diff --git a/.Rbuildignore b/.Rbuildignore new file mode 100644 index 0000000..06ad2cf --- /dev/null +++ b/.Rbuildignore @@ -0,0 +1,11 @@ +^.*\.Rproj$ +^\.Rproj\.user$ +^cran-comments\.md$ +^\.github$ +^README\.Rmd$ +^\.vscode$ +^vscode-install.r$ +^benchmarks$ +^CRAN-SUBMISSION$ +^LICENSE\.md$ +^dev$ diff --git a/.github/.gitignore b/.github/.gitignore new file mode 100644 index 0000000..2d19fc7 --- /dev/null +++ b/.github/.gitignore @@ -0,0 +1 @@ +*.html diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml new file mode 100644 index 0000000..0bc2c1d --- /dev/null +++ b/.github/workflows/R-CMD-check.yaml @@ -0,0 +1,46 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, master] + pull_request: + branches: [main, master] + +name: R-CMD-check + +jobs: + R-CMD-check: + runs-on: ${{ matrix.config.os }} + + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + + strategy: + fail-fast: false + matrix: + config: + - {os: ubuntu-latest, r: 'release'} + + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + R_KEEP_PKG_SOURCE: yes + + steps: + - uses: actions/checkout@v3 + - run: sudo apt-get update && sudo apt-get install texlive texlive-latex-extra + + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + r-version: ${{ matrix.config.r }} + http-user-agent: ${{ matrix.config.http-user-agent }} + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::rcmdcheck + needs: check + + - uses: r-lib/actions/check-r-package@v2 + with: + upload-snapshots: true diff --git a/.gitignore b/.gitignore index a43e713..81ad48f 100644 --- a/.gitignore +++ b/.gitignore @@ -12,12 +12,16 @@ *.cache *~ # emacs backup +<<<<<<< HEAD # Rproj files .Rproj.user -*.Rproj -*.Rbuildignore # Build files config.* src/config.h src/texput.log src/Makevars +## Comment +#*.Rproj +#*.Rbuildignore +.Rhistory +.vscode diff --git a/DESCRIPTION b/DESCRIPTION index 37fd746..beafe8a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,12 +1,21 @@ Package: lfe -Version: 2.8-4 -Date: 2019-12-12 +Version: 2.9-1 +Date: 2023-04-03 Title: Linear Group Fixed Effects -Authors@R: c(person("Simen", "Gaure", email="Simen.Gaure@frisch.uio.no", role=c("aut","cre"), +Authors@R: c(person("Simen", "Gaure", email="Simen.Gaure@frisch.uio.no", role=c("aut"), comment=c(ORCID="https://orcid.org/0000-0001-7251-8747")), person("Grant","McDermott", email="grantmcd@uoregon.edu", role="ctb"), + person(given = "Mauricio", + family = "Vargas", + role = "ctb", + email = "mavargas11@uc.cl", + comment = c(ORCID = "0000-0003-1017-7574")), + person(given = "Paulo", + family = "Alcazar", + role = "ctb"), person("Karl", "Dunkle Werner", role="ctb"), - person("Matthieu","Stigler",role="ctb"), + person("Matthieu","Stigler", email = "Matthieu.Stigler@gmail.com", role= c("ctb", "cre"), + comment =c(ORCID="0000-0002-6802-4290")), person("Daniel","Lüdecke",email="mail@danielluedecke.de",role="ctb")) Copyright: 2011-2019, Simen Gaure Depends: R (>= 2.15.2), Matrix (>= 1.1-2) @@ -25,13 +34,15 @@ VignetteBuilder: knitr ByteCompile: yes Description: Transforms away factors with many levels prior to doing an OLS. Useful for estimating linear models with multiple group fixed effects, and for - estimating linear models which uses factors with many levels as pure control variables. + estimating linear models which uses factors with many levels as pure control variables. See Gaure (2013) Includes support for instrumental variables, conditional F statistics for weak instruments, - robust and multi-way clustered standard errors, as well as limited mobility bias correction. + robust and multi-way clustered standard errors, as well as limited mobility bias correction (Gaure 2014 ). + WARNING: This package is NOT under active development anymore, no further improvements are to be expected, and the package is at risk of being removed from CRAN. License: Artistic-2.0 Classification/JEL: C13, C23, C60 Classification/MSC: 62J05, 65F10, 65F50 -URL: https://github.com/sgaure/lfe -BugReports: https://github.com/sgaure/lfe/issues +URL: https://github.com/MatthieuStigler/lfe +BugReports: https://github.com/MatthieuStigler/lfe/issues Encoding: UTF-8 -RoxygenNote: 7.0.2 +RoxygenNote: 7.2.3 +Roxygen: list(markdown = TRUE) diff --git a/NAMESPACE b/NAMESPACE index 8b3bce8..86d359c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -8,10 +8,13 @@ S3method(logLik,felm) S3method(model.frame,felm) S3method(model.matrix,felm) S3method(nobs,felm) +S3method(predict,fepois) S3method(print,felm) S3method(print,summary.felm) +S3method(print,summary.fepois) S3method(residuals,felm) S3method(summary,felm) +S3method(summary,fepois) S3method(update,felm) S3method(vcov,felm) S3method(weights,felm) @@ -27,6 +30,7 @@ export(condfstat) export(demeanlist) export(efactory) export(felm) +export(fepois) export(fevcov) export(fixedse) export(getfe) @@ -41,6 +45,7 @@ export(waldtest) import('stats') import(Formula) importClassesFrom(Matrix,sparseMatrix) +importFrom(Formula,Formula) importFrom(Matrix,Cholesky) importFrom(Matrix,Diagonal) importFrom(Matrix,colSums) @@ -53,6 +58,7 @@ importFrom(Matrix,solve) importFrom(Matrix,t) importFrom(Matrix,tcrossprod) importFrom(methods,as) +importFrom(methods,is) importFrom(sandwich,bread) importFrom(sandwich,estfun) importFrom(xtable,xtable) diff --git a/R/bccorr.R b/R/bccorr.R index a9ad718..6403c09 100644 --- a/R/bccorr.R +++ b/R/bccorr.R @@ -1,13 +1,14 @@ # $Id: bccorr.R 2004 2016-04-21 10:31:20Z sgaure $ narowsum <- function(x, group) { - opt <- options(warn=-1) - res <- try(rowsum(x,group), silent=TRUE) - options(opt) + # opt <- options(warn=-1) ## CRAN request to rather use suppressWarnings() + res <- suppressWarnings(try(rowsum(x, group), silent = TRUE)) + # options(opt) narow <- is.na(rownames(res)) - if(any(narow)) - res[!narow,,drop=FALSE] - else - res + if (any(narow)) { + res[!narow, , drop = FALSE] + } else { + res + } } # try without reference @@ -34,11 +35,11 @@ narowsum <- rowsum #' and \eqn{F} are matrices with dummy encoded factors, one application of \pkg{lfe} is to #' study the correlation \eqn{cor(D\theta, F\psi)}. However, if we use #' estimates for \eqn{\theta} and \eqn{\psi}, the resulting correlation is biased. -#' The function \code{bccorr} computes a bias corrected correlation +#' The function `bccorr` computes a bias corrected correlation #' as described in \cite{Gaure (2014)}. #' @param est an object of class '"felm"', the result of a call to -#' \code{\link{felm}(keepX=TRUE)}. -#' @param alpha a data frame, the result of a call to \code{\link{getfe}}. +#' `[felm](keepX=TRUE)`. +#' @param alpha a data frame, the result of a call to [getfe()]. #' @param corrfactors integer or character vector of length 2. The factors to #' correlate. The default is fine if there are only two factors in the model. #' @param nocovar logical. Assume no other covariates than the two @@ -48,11 +49,11 @@ narowsum <- rowsum #' @param lhs character. Name of left hand side if multiple left hand sides. #' @return -#' \code{bccorr} returns a named integer vector with the following fields: +#' `bccorr` returns a named integer vector with the following fields: #' #' \item{corr}{the bias corrected correlation.} #' \item{v1}{the bias corrected variance for the first factor specified -#' by \code{corrfactors}.} +#' by `corrfactors`.} #' \item{v2}{the bias corrected variance for the second factor.} #' \item{cov}{the bias corrected covariance between the two factors.} #' \item{d1}{the bias correction for the first factor.} @@ -65,63 +66,63 @@ narowsum <- rowsum #' @details #' The bias expressions from \cite{Andrews et al.} are of the form \eqn{tr(AB^{-1}C)} #' where \eqn{A}, \eqn{B}, and \eqn{C} are matrices too large to be handled -#' directly. \code{bccorr} estimates the trace by using the formula \eqn{tr(M) = E(x^t M x)} +#' directly. `bccorr` estimates the trace by using the formula \eqn{tr(M) = E(x^t M x)} #' where x is a vector with coordinates drawn uniformly from the set \eqn{\{-1,1\}}. #' More specifically, the expectation is estimated by #' sample means, i.e. in each sample a vector x is drawn, the #' equation \eqn{Bv = Cx} is solved by a conjugate gradient method, and the -#' real number \eqn{x^t Av} is computed. -#' -#' There are three bias corrections, for the variances of \eqn{D\theta} (\code{vD}) and -#' \eqn{F\psi} (\code{vF}), and their covariance (\code{vDF}).The correlation is computed as -#' \code{rho <- vDF/sqrt(vD*vF)}. The variances are estimated to a -#' relative tolerance specified by the argument \code{tol}. The covariance -#' bias is estimated to an absolute tolerance in the correlation \code{rho} -#' (conditional on the already bias corrected \code{vD} and \code{vF}) specified by -#' \code{tol}. The CG algortithm does not need to be exceedingly precise, +#' real number \eqn{x^t Av} is computed. +#' +#' There are three bias corrections, for the variances of \eqn{D\theta} (`vD`) and +#' \eqn{F\psi} (`vF`), and their covariance (`vDF`).The correlation is computed as +#' `rho <- vDF/sqrt(vD*vF)`. The variances are estimated to a +#' relative tolerance specified by the argument `tol`. The covariance +#' bias is estimated to an absolute tolerance in the correlation `rho` +#' (conditional on the already bias corrected `vD` and `vF`) specified by +#' `tol`. The CG algorithm does not need to be exceedingly precise, #' it is terminated when the solution reaches a precision which is -#' sufficient for the chosen precision in \code{vD, vF, vDF}. -#' -#' If \code{est} is the result of a weighted \code{\link{felm}} estimation, +#' sufficient for the chosen precision in `vD, vF, vDF`. +#' +#' If `est` is the result of a weighted [felm()] estimation, #' the variances and correlations are weighted too. #' @note #' Bias correction for IV-estimates are not supported as of now. -#' -#' Note that if \code{est} is the result of a call to \code{\link{felm}} -#' with \code{keepX=FALSE} (the default), the correlation will be computed +#' +#' Note that if `est` is the result of a call to [felm()] +#' with `keepX=FALSE` (the default), the correlation will be computed #' as if the covariates X are independent of the two factors. This will be #' faster (typically by a factor of approx. 4), and possibly wronger. -#' +#' #' Note also that the computations performed by this function are #' non-trivial, they may take quite some time. It would be wise to start #' out with quite liberal tolerances, e.g. \cite{tol=0.1}, to #' get an idea of the time requirements. -#' +#' #' The algorithm used is not very well suited for small datasets with only #' a few thousand levels in the factors. -#' @seealso \code{\link{fevcov}} +#' @seealso [fevcov()] #' @examples #' x <- rnorm(500) #' x2 <- rnorm(length(x)) -#' +#' #' ## create individual and firm -#' id <- factor(sample(40,length(x),replace=TRUE)) -#' firm <- factor(sample(30,length(x),replace=TRUE,prob=c(2,rep(1,29)))) -#' foo <- factor(sample(20,length(x),replace=TRUE)) +#' id <- factor(sample(40, length(x), replace = TRUE)) +#' firm <- factor(sample(30, length(x), replace = TRUE, prob = c(2, rep(1, 29)))) +#' foo <- factor(sample(20, length(x), replace = TRUE)) #' ## effects #' id.eff <- rnorm(nlevels(id)) #' firm.eff <- rnorm(nlevels(firm)) #' foo.eff <- rnorm(nlevels(foo)) #' ## left hand side -#' y <- x + 0.25*x2 + id.eff[id] + firm.eff[firm] + foo.eff[foo] + rnorm(length(x)) -#' +#' y <- x + 0.25 * x2 + id.eff[id] + firm.eff[firm] + foo.eff[foo] + rnorm(length(x)) +#' #' # make a data frame -#' fr <- data.frame(y,x,x2,id,firm,foo) +#' fr <- data.frame(y, x, x2, id, firm, foo) #' ## estimate and print result -#' est <- felm(y ~ x+x2|id+firm+foo, data=fr, keepX=TRUE) +#' est <- felm(y ~ x + x2 | id + firm + foo, data = fr, keepX = TRUE) #' # find bias corrections #' bccorr(est) @@ -129,111 +130,120 @@ narowsum <- rowsum #' Gaure, S. (2014), \cite{Correlation bias correction in two-way #' fixed-effects linear regression}, Stat 3(1):379:390, 2014. #' @export -bccorr <- function(est, alpha=getfe(est), corrfactors=1L:2L, - nocovar=(length(est$X)==0) && length(est$fe)==2, - tol=0.01, maxsamples=Inf, lhs=NULL) { - - if(nlevels(est$cfactor) > 1) stop('Data should have just a single connected component') - if(length(est$fe) == 2 && nlevels(est$cfactor) != 1) stop('Bias correction only makes sense on data with 1 component') - if(length(est$fe) < 2) stop('Bias correction only makes sense for two factors') - if(length(corrfactors) != 2) stop('corrfactors must have length 2') - if(is.character(corrfactors)) corrfactors <- match(corrfactors, names(est$fe)) - if(min(corrfactors) < 1 || max(corrfactors) > length(est$fe)) - stop('corrfactors specifies too small or large index') - if(!('fe' %in% colnames(alpha))) stop('alpha must have an "fe" column') -# if(!is.null(est$weights)) warning("Bias correction with weights not yet fully correct") - -# fe <- est$fe +bccorr <- function(est, alpha = getfe(est), corrfactors = 1L:2L, + nocovar = (length(est$X) == 0) && length(est$fe) == 2, + tol = 0.01, maxsamples = Inf, lhs = NULL) { + if (nlevels(est$cfactor) > 1) stop("Data should have just a single connected component") + if (length(est$fe) == 2 && nlevels(est$cfactor) != 1) stop("Bias correction only makes sense on data with 1 component") + if (length(est$fe) < 2) stop("Bias correction only makes sense for two factors") + if (length(corrfactors) != 2) stop("corrfactors must have length 2") + if (is.character(corrfactors)) corrfactors <- match(corrfactors, names(est$fe)) + if (min(corrfactors) < 1 || max(corrfactors) > length(est$fe)) { + stop("corrfactors specifies too small or large index") + } + if (!("fe" %in% colnames(alpha))) stop('alpha must have an "fe" column') + # if(!is.null(est$weights)) warning("Bias correction with weights not yet fully correct") + + # fe <- est$fe f1 <- est$fe[[corrfactors[[1]]]] f2 <- est$fe[[corrfactors[[2]]]] nf1 <- names(est$fe)[[corrfactors[[1]]]] nf2 <- names(est$fe)[[corrfactors[[2]]]] - if(!is.null(attr(f1,'x',exact=TRUE))) stop('Interacted factors "',nf1,'" are not supported') - if(!is.null(attr(f2,'x',exact=TRUE))) stop('Interacted factors "',nf2,'" are not supported') - effnam <- 'effect' - if(length(est$lhs) == 1) lhs <- est$lhs - if(!('effect' %in% colnames(alpha))) { - if(is.null(lhs)) - stop('Please specify lhs=[one of ',paste(est$lhs, collapse=','),']') - effnam <- paste('effect',lhs,sep='.') - if(!(effnam %in% colnames(alpha))) { + if (!is.null(attr(f1, "x", exact = TRUE))) stop('Interacted factors "', nf1, '" are not supported') + if (!is.null(attr(f2, "x", exact = TRUE))) stop('Interacted factors "', nf2, '" are not supported') + effnam <- "effect" + if (length(est$lhs) == 1) lhs <- est$lhs + if (!("effect" %in% colnames(alpha))) { + if (is.null(lhs)) { + stop("Please specify lhs=[one of ", paste(est$lhs, collapse = ","), "]") + } + effnam <- paste("effect", lhs, sep = ".") + if (!(effnam %in% colnames(alpha))) { stop("Can't find effect-column in alpha") } } - resid <- est$residuals[,lhs] - d1 <- alpha[alpha['fe']==nf1,effnam][f1] - d2 <- alpha[alpha['fe']==nf2,effnam][f2] - w <- if(is.null(est$weights)) 1.0 else est$weights - if(is.null(est$weights)) { + resid <- est$residuals[, lhs] + d1 <- alpha[alpha["fe"] == nf1, effnam][f1] + d2 <- alpha[alpha["fe"] == nf2, effnam][f2] + w <- if (is.null(est$weights)) 1.0 else est$weights + if (is.null(est$weights)) { var1 <- var(d1) var2 <- var(d2) - cov12 <- cov(d1,d2) + cov12 <- cov(d1, d2) + } else { + var1 <- wvar(d1, w^2) + var2 <- wvar(d2, w^2) + cov12 <- wcov(d1, d2, w^2) + } + delta1 <- varbias(corrfactors[[1]], est, tol, var1, maxsamples, resid = resid, weights = est$weights) + if (is.null(est$weights)) { + epsvar <- sum(resid^2) / est$df } else { - var1 <- wvar(d1,w^2) - var2 <- wvar(d2,w^2) - cov12 <- wcov(d1,d2,w^2) + epsvar <- sum(w^2 * resid^2) / est$df / sum(w^2) } - delta1 <- varbias(corrfactors[[1]],est,tol,var1,maxsamples,resid=resid,weights=est$weights) - if(is.null(est$weights)) - epsvar <- sum(resid^2)/est$df - else - epsvar <- sum(w^2*resid^2)/est$df/sum(w^2) - if(nocovar) { + if (nocovar) { f1 <- est$fe[[corrfactors[[1]]]] f2 <- est$fe[[corrfactors[[2]]]] N <- length(f1) - delta2 <- delta1 - epsvar*(nlevels(f1)-nlevels(f2))/N - delta12 <- epsvar*nlevels(f1)/N - delta1 + delta2 <- delta1 - epsvar * (nlevels(f1) - nlevels(f2)) / N + delta12 <- epsvar * nlevels(f1) / N - delta1 } else { - delta2 <- varbias(corrfactors[[2]],est,tol,var2,maxsamples, resid=resid,weights=est$weights) - eps <- -tol*sqrt((var1-delta1)*(var2-delta2)) - delta12 <- covbias(corrfactors,est,eps,maxsamples=maxsamples,resid=resid, weights=est$weights) + delta2 <- varbias(corrfactors[[2]], est, tol, var2, maxsamples, resid = resid, weights = est$weights) + eps <- -tol * sqrt((var1 - delta1) * (var2 - delta2)) + delta12 <- covbias(corrfactors, est, eps, maxsamples = maxsamples, resid = resid, weights = est$weights) } vartheta <- var1 - delta1 varpsi <- var2 - delta2 covtp <- cov12 - delta12 - c(corr=covtp/sqrt(vartheta*varpsi), - v1=vartheta, - v2=varpsi, - cov=covtp, - d1=delta1, - d2=delta2, - d12=delta12) - + c( + corr = covtp / sqrt(vartheta * varpsi), + v1 = vartheta, + v2 = varpsi, + cov = covtp, + d1 = delta1, + d2 = delta2, + d12 = delta12 + ) } -halftrace <- function(x, f, restf, MFX, tol, lmean, name, weights=NULL) { +halftrace <- function(x, f, restf, MFX, tol, lmean, name, weights = NULL) { w <- weights - if(is.null(w)) {ww <- ww2 <- 1.0} else {ww2 <- w^2; ww <- w} + if (is.null(w)) { + ww <- ww2 <- 1.0 + } else { + ww2 <- w^2 + ww <- w + } - if(is.null(MFX)) { + if (is.null(MFX)) { invfun <- function(v) { - Crowsum(ww2*demeanlist(v[f,], restf, weights=w), f) + crowsum(ww2 * demeanlist(v[f, ], restf, weights = w), f) } } else { invfun <- function(v) { - Crowsum(ww2*demeanlist(demeanlist(ww*v[f,],MFX)/ww, restf, weights=w), f) + crowsum(ww2 * demeanlist(demeanlist(ww * v[f, ], MFX) / ww, restf, weights = w), f) } } - DtM1x <- Crowsum(ww*demeanlist(x,lmean, weights=weights, scale=FALSE), f) + DtM1x <- crowsum(ww * demeanlist(x, lmean, weights = weights, scale = FALSE), f) # we use absolute tolerance, mctrace wil give us a trtol. # however, we square our result: (Rx + err)^2 = Rx^2 + 2*err*Rx + err^2 # so to get 2*err*Rx < tol, we must have err < tol/(2*Rx), i.e. # we should use a relative tolerance -# tol1 <- -tol/sqrt(colSums(DtM1x^2)) + # tol1 <- -tol/sqrt(colSums(DtM1x^2)) tol1 <- tol -# message('cgsolve: tol1 ',tol1, ' tol ',tol) - v <- cgsolve(invfun, DtM1x, eps=tol1,name=name) - if(!is.null(MFX)) - Rx <- ww2*demeanlist(demeanlist(ww*v[f,],MFX)/ww, restf, weights=w) - else - Rx <- ww*demeanlist(v[f,],restf, weights=weights) - -# message("|v| = ",mean(sqrt(colSums(v^2))), '|Rx|=',mean(sqrt(colSums(Rx^2)))) + # message('cgsolve: tol1 ',tol1, ' tol ',tol) + v <- cgsolve(invfun, DtM1x, eps = tol1, name = name) + if (!is.null(MFX)) { + Rx <- ww2 * demeanlist(demeanlist(ww * v[f, ], MFX) / ww, restf, weights = w) + } else { + Rx <- ww * demeanlist(v[f, ], restf, weights = weights) + } + + # message("|v| = ",mean(sqrt(colSums(v^2))), '|Rx|=',mean(sqrt(colSums(Rx^2)))) Rx } @@ -244,12 +254,12 @@ halftrace <- function(x, f, restf, MFX, tol, lmean, name, weights=NULL) { # that is, the bias is similar as before, but use M_W instead of M_1 # and Wx instead of x (i.e. WDtheta instead of Dtheta). # remember that the weights in the felm-object are the square roots. -varbias <- function(index,est,tol=0.01,bvar, maxsamples=Inf, - robust=!is.null(est$clustervar), resid, weights=NULL,dfadj=1) { - if(length(index) != 1) stop("index must have length 1") - if(!is.null(est$stage1)) stop("Bias correction with IV not supported yet") -# on.exit({rm(list=ls()); gc()}) - if(length(tol)==1) { +varbias <- function(index, est, tol = 0.01, bvar, maxsamples = Inf, + robust = !is.null(est$clustervar), resid, weights = NULL, dfadj = 1) { + if (length(index) != 1) stop("index must have length 1") + if (!is.null(est$stage1)) stop("Bias correction with IV not supported yet") + # on.exit({rm(list=ls()); gc()}) + if (length(tol) == 1) { tracetol <- tol cgtf <- 1 } else { @@ -259,118 +269,124 @@ varbias <- function(index,est,tol=0.01,bvar, maxsamples=Inf, X <- est$X f <- est$fe[[index]] restf <- est$fe[-index] - name <- paste('var(',names(est$fe)[[index]],')', sep='') + name <- paste("var(", names(est$fe)[[index]], ")", sep = "") N <- length(f) nlev <- nlevels(f) w <- weights - if(is.null(w)) { + if (is.null(w)) { wc <- ww <- ww2 <- 1.0 } else { - w <- w/sqrt(sum(w^2)) - ww2 <- w^2; ww <- w; - wc <- 1/(1 - sum(ww2^2)) + w <- w / sqrt(sum(w^2)) + ww2 <- w^2 + ww <- w + wc <- 1 / (1 - sum(ww2^2)) } - -# First, make a factor list for projecting out mean. - fmean <- factor(rep(1,N)) + + # First, make a factor list for projecting out mean. + fmean <- factor(rep(1, N)) lmean <- list(fmean) -# project out X and F, i.e. everything but f1 -# use M_{F,X} = M_F M_{M_F X} -# precompute and orthonormalize M_F X + # project out X and F, i.e. everything but f1 + # use M_{F,X} = M_F M_{M_F X} + # precompute and orthonormalize M_F X - if(length(X) > 0) { + if (length(X) > 0) { # Find NA's in the coefficients, remove corresponding variables from X - bad <- apply(est$coefficients,1,anyNA) - if(any(bad)) X <- X[,!bad,drop=FALSE] - MFX <- list(structure(fmean,x=orthonormalize(demeanlist(X,restf, - weights=w,scale=c(TRUE,FALSE))))) + bad <- apply(est$coefficients, 1, anyNA) + if (any(bad)) X <- X[, !bad, drop = FALSE] + MFX <- list(structure(fmean, x = orthonormalize(demeanlist(X, restf, + weights = w, scale = c(TRUE, FALSE) + )))) invfun <- function(v) { - Crowsum(ww*demeanlist(demeanlist(ww*v[f,],MFX), restf, weights=w, scale=FALSE), f) + crowsum(ww * demeanlist(demeanlist(ww * v[f, ], MFX), restf, weights = w, scale = FALSE), f) } } else { MFX <- NULL invfun <- function(v) { - Crowsum(ww*demeanlist(v[f,], restf, weights=w,scale=c(TRUE,FALSE)), f) + crowsum(ww * demeanlist(v[f, ], restf, weights = w, scale = c(TRUE, FALSE)), f) } } - if(is.null(w)) - epsvar <- sum(resid^2)/est$df - else - epsvar <- sum(ww2*resid^2)*N/est$df + if (is.null(w)) { + epsvar <- sum(resid^2) / est$df + } else { + epsvar <- sum(ww2 * resid^2) * N / est$df + } vvfoo <- epsvar - if(robust) { + if (robust) { # residuals present, do cluster robust correction # create all the cluster interactions, so we don't have to do # it each time in the iteration docluster <- !is.null(est$clustervar) - toladj <- sqrt(sum((ww*resid)^2)) - if(docluster) { + toladj <- sqrt(sum((ww * resid)^2)) + if (docluster) { d <- length(est$clustervar) cia <- list() - for(i in 1:(2^d-1)) { + for (i in 1:(2^d - 1)) { # Find out which ones to interact iac <- as.logical(intToBits(i))[1:d] # interact the factors - cia[[i]] <- factor(do.call(paste,c(est$clustervar[iac],sep='\004'))) + cia[[i]] <- factor(do.call(paste, c(est$clustervar[iac], sep = "\004"))) } } - wwres <- ww*resid - trfun <- function(x,trtol) { + wwres <- ww * resid + trfun <- function(x, trtol) { # return crude estimate of the trace - if(trtol == 0) return(abs(nlev)) -# on.exit({rm(list=ls()); gc()}) + if (trtol == 0) { + return(abs(nlev)) + } + # on.exit({rm(list=ls()); gc()}) # since we square our result, we should use sqrt(trtol)/2 as a tolerance # trtol is the absolute tolerance, but halftrace is squared, we should # really use a relative tolerance, we use the square root of the tracetol -# message('trtol=',trtol, ' N ',N,' toladj ',toladj) - Rx <- halftrace(x, f, restf, MFX, -sqrt(trtol/toladj), lmean, name, weights=w) -# Rx <- halftrace(x, f, restf, MFX, sqrt(trtol)/2, lmean, name, weights=w) -# Rx <- halftrace(x, f, restf, MFX, trtol, lmean, name, weights=w) + # message('trtol=',trtol, ' N ',N,' toladj ',toladj) + Rx <- halftrace(x, f, restf, MFX, -sqrt(trtol / toladj), lmean, name, weights = w) + # Rx <- halftrace(x, f, restf, MFX, sqrt(trtol)/2, lmean, name, weights=w) + # Rx <- halftrace(x, f, restf, MFX, trtol, lmean, name, weights=w) # now apply cluster stuff # first, scale with (weighted) residuals -# + # .Call(C_scalecols, Rx, wwres) - if(!docluster) { + if (!docluster) { # It's heteroscedastic return(colSums(Rx * Rx)) } else { # it's one or more clusters, do the Cameron et al detour - result <- vector('numeric',ncol(Rx)) - for(i in 1:(2^d-1)) { + result <- vector("numeric", ncol(Rx)) + for (i in 1:(2^d - 1)) { ia <- cia[[i]] - b <- Crowsum(Rx,ia) + b <- crowsum(Rx, ia) # odd number is positive, even is negative - sgn <- 2*(sum(as.logical(intToBits(i))[1:d]) %% 2) - 1 - adj <- sgn*dfadj*nlevels(ia)/(nlevels(ia)-1) - result <- result + adj* colSums(b * b) + sgn <- 2 * (sum(as.logical(intToBits(i))[1:d]) %% 2) - 1 + adj <- sgn * dfadj * nlevels(ia) / (nlevels(ia) - 1) + result <- result + adj * colSums(b * b) rm(b) -# result <- result + vvfoo*adj* colSums(Rx * Rx) + # result <- result + vvfoo*adj* colSums(Rx * Rx) } return(result) } } - epsvar <- 1 # now incorporated in the robust variance matrix, so don't scale + epsvar <- 1 # now incorporated in the robust variance matrix, so don't scale } else { - - trfun <- function(x,trtol) { + trfun <- function(x, trtol) { # return crude estimate of the trace - if(trtol == 0) return(abs(nlev)) -# on.exit({rm(list=ls()); gc()}) - DtM1x <- Crowsum(ww*demeanlist(unnamed(x),lmean,weights=w,scale=FALSE), f) + if (trtol == 0) { + return(abs(nlev)) + } + # on.exit({rm(list=ls()); gc()}) + DtM1x <- crowsum(ww * demeanlist(unnamed(x), lmean, weights = w, scale = FALSE), f) # we use absolute tolerance, mctrace wil give us a trtol. # we divide by the L2-norm of DtM1x, since we take the # inner product with this afterwards - tol1 <- -trtol/cgtf/sqrt(colSums(DtM1x^2))/2 - v <- cgsolve(invfun, DtM1x, eps=tol1,name=name) + tol1 <- -trtol / cgtf / sqrt(colSums(DtM1x^2)) / 2 + v <- cgsolve(invfun, DtM1x, eps = tol1, name = name) colSums(DtM1x * v) } } - attr(trfun,'IP') <- TRUE + attr(trfun, "IP") <- TRUE epsvar <- epsvar * wc # If we have weights, we should not divide the trace by N @@ -383,18 +399,20 @@ varbias <- function(index,est,tol=0.01,bvar, maxsamples=Inf, # where bvar is the biased variance epsfun <- function(tr) { - aa <- N*bvar - epsvar*tr - if(aa < 0) { - -abs(tracetol)*0.5*N*bvar/epsvar + aa <- N * bvar - epsvar * tr + if (aa < 0) { + -abs(tracetol) * 0.5 * N * bvar / epsvar } else { - -abs(tracetol)*aa/epsvar + -abs(tracetol) * aa / epsvar } } # the tolerance before mctrace has got a clue about where we are, # is a problem. If the bias is very large compared to the variance, we will - # be in trouble. - res <- epsvar*mctrace(trfun,N=N,tol=epsfun, trname=name, - maxsamples=maxsamples)/N + # be in trouble. + res <- epsvar * mctrace(trfun, + N = N, tol = epsfun, trname = name, + maxsamples = maxsamples + ) / N } @@ -402,19 +420,20 @@ varbias <- function(index,est,tol=0.01,bvar, maxsamples=Inf, # covariance. In this case, the biased covariance (bcov) and residual # variance (epsvar) must be specified. A negative tolerance is an # absolute tolerance -covbias <- function(index,est,tol=0.01, maxsamples=Inf, resid, weights=NULL, - robust=!is.null(est$clustervar)) { - if(length(index) != 2) stop("index must have length 2") - if(!is.null(est$stage1)) stop("Bias correction with IV not supported yet") - if(length(est$fe) < 2) stop("fe must have length >= 2") -# on.exit({rm(list=ls()); gc()}) +covbias <- function(index, est, tol = 0.01, maxsamples = Inf, resid, weights = NULL, + robust = !is.null(est$clustervar)) { + if (length(index) != 2) stop("index must have length 2") + if (!is.null(est$stage1)) stop("Bias correction with IV not supported yet") + if (length(est$fe) < 2) stop("fe must have length >= 2") + # on.exit({rm(list=ls()); gc()}) w <- weights - if(is.null(w)) { + if (is.null(w)) { wc <- ww2 <- ww <- 1 } else { - w <- w/sqrt(sum(w^2)) - ww <- w; ww2 <- w^2 - wc <- sum(w^2)/(sum(w^2)^2 - sum(w^4)) + w <- w / sqrt(sum(w^2)) + ww <- w + ww2 <- w^2 + wc <- sum(w^2) / (sum(w^2)^2 - sum(w^4)) } X <- est$X f1 <- est$fe[[index[[1]]]] @@ -422,136 +441,154 @@ covbias <- function(index,est,tol=0.01, maxsamples=Inf, resid, weights=NULL, nlev1 <- nlevels(f1) nlev2 <- nlevels(f2) N <- length(f1) - name <- paste('cov(',paste(names(est$fe)[index],collapse=','),')',sep='') - name1 <- paste(name,names(est$fe)[index[[1]]],sep='.') - name2 <- paste(name,names(est$fe)[index[[2]]], sep='.') + name <- paste("cov(", paste(names(est$fe)[index], collapse = ","), ")", sep = "") + name1 <- paste(name, names(est$fe)[index[[1]]], sep = ".") + name2 <- paste(name, names(est$fe)[index[[2]]], sep = ".") no2list <- est$fe[-index[[2]]] no1list <- est$fe[-index[[1]]] restf <- est$fe[-index] - fmean <- factor(rep(1,N)) + fmean <- factor(rep(1, N)) lmean <- list(fmean) - if(is.null(w)) - epsvar <- sum(resid^2)/est$df - else - epsvar <- sum(ww2*resid^2)*N/est$df + if (is.null(w)) { + epsvar <- sum(resid^2) / est$df + } else { + epsvar <- sum(ww2 * resid^2) * N / est$df + } - if(length(X) > 0) { - bad <- apply(est$coefficients,1,anyNA) - if(any(bad)) X <- X[,!bad,drop=FALSE] + if (length(X) > 0) { + bad <- apply(est$coefficients, 1, anyNA) + if (any(bad)) X <- X[, !bad, drop = FALSE] } MDX <- MFX <- NULL - if(length(X) > 0) { - MDX <- list(structure(fmean,x=orthonormalize(demeanlist(X, - no2list,weights=w, scale=c(TRUE,FALSE))))) - MFX <- list(structure(fmean,x=orthonormalize(demeanlist(X, - no1list,weights=w, scale=c(TRUE,FALSE))))) + if (length(X) > 0) { + MDX <- list(structure(fmean, x = orthonormalize(demeanlist(X, + no2list, + weights = w, scale = c(TRUE, FALSE) + )))) + MFX <- list(structure(fmean, x = orthonormalize(demeanlist(X, + no1list, + weights = w, scale = c(TRUE, FALSE) + )))) invfun <- function(v) { - Crowsum(ww*demeanlist(demeanlist(ww*v[f2,],MDX), no2list, - weights=w, scale=FALSE), f2) + crowsum(ww * demeanlist(demeanlist(ww * v[f2, ], MDX), no2list, + weights = w, scale = FALSE + ), f2) } - if(length(restf) > 0) { - MX <- list(structure(fmean,x=orthonormalize(demeanlist(X, - restf,weights=w, scale=c(TRUE,FALSE))))) - MXfun <- function(v) ww*demeanlist(demeanlist(ww*v[f1,], MX), restf, weights=w, - scale=FALSE) + if (length(restf) > 0) { + MX <- list(structure(fmean, x = orthonormalize(demeanlist(X, + restf, + weights = w, scale = c(TRUE, FALSE) + )))) + MXfun <- function(v) { + ww * demeanlist(demeanlist(ww * v[f1, ], MX), restf, + weights = w, + scale = FALSE + ) + } } else { - MX <- list(structure(fmean, x=orthonormalize(ww*X))) - MXfun <- function(v) ww*demeanlist(ww*v[f1,], MX) + MX <- list(structure(fmean, x = orthonormalize(ww * X))) + MXfun <- function(v) ww * demeanlist(ww * v[f1, ], MX) } } else { invfun <- function(v) { - Crowsum(ww2*demeanlist(v[f2,], no2list, weights=w), f2) + crowsum(ww2 * demeanlist(v[f2, ], no2list, weights = w), f2) } - MXfun <- function(v) ww2*demeanlist(v[f1,], restf, weights=w) + MXfun <- function(v) ww2 * demeanlist(v[f1, ], restf, weights = w) } invfunX <- function(v) { - Crowsum(MXfun(v), f1) + crowsum(MXfun(v), f1) } - - if(robust) { + + if (robust) { # residuals present, do cluster robust correction # create all the cluster interactions, so we don't have to do # it each time in the iteration tolmod <- 1 docluster <- !is.null(est$clustervar) - if(docluster) { + if (docluster) { tolmod <- 0 d <- length(est$clustervar) cia <- list() - for(i in 1:(2^d-1)) { + for (i in 1:(2^d - 1)) { # Find out which ones to interact iac <- as.logical(intToBits(i))[1:d] # interact the factors - cia[[i]] <- factor(do.call(paste,c(est$clustervar[iac],sep='\004'))) + cia[[i]] <- factor(do.call(paste, c(est$clustervar[iac], sep = "\004"))) tolmod <- tolmod + nlevels(cia[[i]]) } } - toladj <- sqrt(sum((ww*resid)^2)) - wwres <- ww*resid - trfun <- function(x,trtol) { + toladj <- sqrt(sum((ww * resid)^2)) + wwres <- ww * resid + trfun <- function(x, trtol) { # return crude estimate of the trace - if(trtol == 0) return(abs(nlev1-nlev2)) -# on.exit({rm(list=ls()); gc()}) + if (trtol == 0) { + return(abs(nlev1 - nlev2)) + } + # on.exit({rm(list=ls()); gc()}) # since we square our result, we should use sqrt(trtol)/2 as a tolerance - # trtol is the absolute tolerance, but halftrace is squared, we should + # trtol is the absolute tolerance, but halftrace is squared, we should # really use a relative tolerance, we use the square root of the tracetol -# message('cov trtol=',trtol, ' tolmod ',tolmod, ' N ',N) - Lx <- halftrace(x, f1, no1list, MFX, -sqrt(trtol/toladj), lmean, name1, weights=w) - Rx <- halftrace(x, f2, no2list, MDX, -sqrt(trtol/toladj), lmean, name2, weights=w) -# Rx <- halftrace(x, f, restf, MFX, sqrt(trtol)/2, lmean, name, weights=w) -# Rx <- halftrace(x, f, restf, MFX, trtol, lmean, name, weights=w) + # message('cov trtol=',trtol, ' tolmod ',tolmod, ' N ',N) + Lx <- halftrace(x, f1, no1list, MFX, -sqrt(trtol / toladj), lmean, name1, weights = w) + Rx <- halftrace(x, f2, no2list, MDX, -sqrt(trtol / toladj), lmean, name2, weights = w) + # Rx <- halftrace(x, f, restf, MFX, sqrt(trtol)/2, lmean, name, weights=w) + # Rx <- halftrace(x, f, restf, MFX, trtol, lmean, name, weights=w) # now apply cluster stuff # first, scale with (weighted) residuals .Call(C_scalecols, Rx, wwres) .Call(C_scalecols, Lx, wwres) - if(!docluster) { + if (!docluster) { # It's heteroscedastic return(colSums(Lx * Rx)) } else { # it's one or more clusters, do the Cameron et al detour - result <- vector('numeric',ncol(Rx)) - for(i in 1:(2^d-1)) { - Lb <- Crowsum(Lx, cia[[i]]) - Rb <- Crowsum(Rx, cia[[i]]) + result <- vector("numeric", ncol(Rx)) + for (i in 1:(2^d - 1)) { + Lb <- crowsum(Lx, cia[[i]]) + Rb <- crowsum(Rx, cia[[i]]) # odd number is positive, even is negative - sgn <- 2*(sum(as.logical(intToBits(i))[1:d]) %% 2) - 1 + sgn <- 2 * (sum(as.logical(intToBits(i))[1:d]) %% 2) - 1 result <- result + sgn * colSums(Lb * Rb) } return(result) } } - epsvar <- 1 # now incorporated in the robust variance matrix, so don't scale + epsvar <- 1 # now incorporated in the robust variance matrix, so don't scale } else { - trfun <- function(x,trtol) { + trfun <- function(x, trtol) { # return crude estimate of the trace - if(trtol == 0) return(-abs(nlev1-nlev2)) -# on.exit({rm(list=ls()); gc()}) - M1x <- ww*demeanlist(unnamed(x),lmean,weights=w,scale=FALSE) - DtM1x <- Crowsum(M1x,f1) - FtM1x <- Crowsum(M1x,f2) + if (trtol == 0) { + return(-abs(nlev1 - nlev2)) + } + # on.exit({rm(list=ls()); gc()}) + M1x <- ww * demeanlist(unnamed(x), lmean, weights = w, scale = FALSE) + DtM1x <- crowsum(M1x, f1) + FtM1x <- crowsum(M1x, f2) d1 <- colSums(DtM1x^2) d2 <- colSums(FtM1x^2) - v <- cgsolve(invfunX, DtM1x, eps=-trtol/sqrt(d1+d2)/3, name=name) - MXv <- Crowsum(MXfun(v), f2) - sol <- cgsolve(invfun, FtM1x, eps=-trtol/sqrt(colSums(MXv^2))/3, name=name) - -colSums(sol* MXv) + v <- cgsolve(invfunX, DtM1x, eps = -trtol / sqrt(d1 + d2) / 3, name = name) + MXv <- crowsum(MXfun(v), f2) + sol <- cgsolve(invfun, FtM1x, eps = -trtol / sqrt(colSums(MXv^2)) / 3, name = name) + -colSums(sol * MXv) } } # our function does the inner product, not just matrix application. Signal to mctrace. - attr(trfun,'IP') <- TRUE + attr(trfun, "IP") <- TRUE epsvar <- epsvar * wc # absolute precision, scale by N and epsvar since it's trace level precision - eps <- -abs(tol)*N/epsvar + eps <- -abs(tol) * N / epsvar - epsvar*mctrace(trfun, N=N, tol=eps, trname=name, - maxsamples=maxsamples)/N + epsvar * mctrace(trfun, + N = N, tol = eps, trname = name, + maxsamples = maxsamples + ) / N } # compute variance of the biased variance estimate @@ -559,58 +596,63 @@ covbias <- function(index,est,tol=0.01, maxsamples=Inf, resid, weights=NULL, # var(x^t A x) = 2 tr(AVAV) + 4mu^t*AVA*mu # where V is the variance matrix of x, assumed to be sigma^2 I, and mu is the # expectation of x (i.e. Dtheta). -varvar <- function(index, fe, X, pointest, resvar, tol=0.01, - biascorrect=FALSE, weights=NULL) { +varvar <- function(index, fe, X, pointest, resvar, tol = 0.01, + biascorrect = FALSE, weights = NULL) { w <- weights -# w <- NULL - if(is.null(w)) { + # w <- NULL + if (is.null(w)) { wc <- ww <- ww2 <- 1.0 } else { - w <- w/sqrt(sum(w^2)) - ww2 <- w^2; ww <- w - wc <- sum(w^2)/(sum(w^2)^2 - sum(w^4)) + w <- w / sqrt(sum(w^2)) + ww2 <- w^2 + ww <- w + wc <- sum(w^2) / (sum(w^2)^2 - sum(w^4)) } - if(!is.null(w) && biascorrect) warning('bias corrected varvars with weights not tested') + if (!is.null(w) && biascorrect) warning("bias corrected varvars with weights not tested") f <- fe[[index]] N <- length(f) - lmean <- list(factor(rep(1,N))) - name <- paste('varvar(',names(fe)[[index]],')', sep='') - if(length(X)==0) { - MFX <- fe[-index] - invfun <- function(x) { - Crowsum(ww2*demeanlist(x[f,], MFX, weights=w),f) - } + lmean <- list(factor(rep(1, N))) + name <- paste("varvar(", names(fe)[[index]], ")", sep = "") + if (length(X) == 0) { + MFX <- fe[-index] + invfun <- function(x) { + crowsum(ww2 * demeanlist(x[f, ], MFX, weights = w), f) + } } else { -# M_{F,X} = M_F M_{M_F X} + # M_{F,X} = M_F M_{M_F X} restf <- fe[-index] - MFX <- list(structure(factor(rep(1,N)), - x=orthonormalize(ww*demeanlist(X,restf,weights=w)))) + MFX <- list(structure(factor(rep(1, N)), + x = orthonormalize(ww * demeanlist(X, restf, weights = w)) + )) invfun <- function(x) { - Crowsum(ww2*demeanlist(demeanlist(ww*x[f,],MFX)/ww, restf, weights=w), f) + crowsum(ww2 * demeanlist(demeanlist(ww * x[f, ], MFX) / ww, restf, weights = w), f) } } Dtheta <- pointest[f] - DtM1D <- Crowsum(ww2*demeanlist(Dtheta,lmean,weights=w), f) - v <- cgsolve(invfun, DtM1D, eps=tol/4/resvar/sqrt(sum(DtM1D^2)), name=name) - meanpart <- 4*resvar * sum(DtM1D * v) - if(!biascorrect) return(meanpart/N^2) + DtM1D <- crowsum(ww2 * demeanlist(Dtheta, lmean, weights = w), f) + v <- cgsolve(invfun, DtM1D, eps = tol / 4 / resvar / sqrt(sum(DtM1D^2)), name = name) + meanpart <- 4 * resvar * sum(DtM1D * v) + if (!biascorrect) { + return(meanpart / N^2) + } # the mean part is biased upwards. We should correct it. # it turns out that we can do this by changing the sign of the # trace term, the bias is the same expression as the trace part # message('mean part=',meanpart/N^2) - mytol <- meanpart/10 - trfun <- function(x,trtol) { - v <- ww*demeanlist(cgsolve(invfun, Crowsum(ww2*demeanlist(x,lmean,weights=w),f), - eps=-mytol^2/resvar^2/2,name=name)[f,],lmean,weights=w) + mytol <- meanpart / 10 + trfun <- function(x, trtol) { + v <- ww * demeanlist(cgsolve(invfun, crowsum(ww2 * demeanlist(x, lmean, weights = w), f), + eps = -mytol^2 / resvar^2 / 2, name = name + )[f, ], lmean, weights = w) colSums(v * x) } - attr(trfun,'IP') = TRUE - trpart <- 2*resvar^2 * mctrace(trfun, N=length(f), trname=name, tol=-mytol/resvar/2) - if(!is.null(w)) trpart <- trpart/N -# message('mean part=', meanpart, ' trpart=',trpart) - (meanpart-trpart)/N^2 + attr(trfun, "IP") <- TRUE + trpart <- 2 * resvar^2 * mctrace(trfun, N = length(f), trname = name, tol = -mytol / resvar / 2) + if (!is.null(w)) trpart <- trpart / N + # message('mean part=', meanpart, ' trpart=',trpart) + (meanpart - trpart) / N^2 } @@ -622,104 +664,105 @@ varvar <- function(index, fe, X, pointest, resvar, tol=0.01, #' With a model like \eqn{y = X\beta + D\theta + F\psi + \epsilon}, where \eqn{D} and #' \eqn{F} are matrices with dummy encoded factors, one application of \pkg{lfe} is #' to study the variances \eqn{var(D\theta)}, \eqn{var(F\psi)} and covariances -#' \eqn{cov(D\theta, F\psi)}. The function \code{\link{fevcov}} computes bias corrected +#' \eqn{cov(D\theta, F\psi)}. The function [fevcov()] computes bias corrected #' variances and covariances. However, these variance estimates are still -#' random variables for which \code{\link{fevcov}} only estimate the -#' expectation. The function \code{varvars} estimates the variance of these +#' random variables for which [fevcov()] only estimate the +#' expectation. The function `varvars` estimates the variance of these #' estimates. -#' +#' #' This function returns valid results only for normally distributed residuals. #' Note that the estimates for the fixed effect variances from -#' \code{\link{fevcov}} are not normally distributed, but a sum of chi-square +#' [fevcov()] are not normally distributed, but a sum of chi-square #' distributions which depends on the eigenvalues of certain large matrices. We -#' do not compute that distribution. The variances returned by \code{varvars} -#' can therefore \emph{not} be used directly to estimate confidence intervals, +#' do not compute that distribution. The variances returned by `varvars` +#' can therefore *not* be used directly to estimate confidence intervals, #' other than through coarse methods like the Chebyshev inequality. These #' estimates only serve as a rough guideline as to how wrong the variance -#' estimates from \code{\link{fevcov}} might be. -#' +#' estimates from [fevcov()] might be. +#' #' Like the fixed effect variances themselves, their variances are also biased #' upwards. Correcting this bias can be costly, and is therefore by default #' switched off. -#' +#' #' The variances tend to zero with increasing number of observations. Thus, for #' large datasets they will be quite small. -#' +#' #' @param est an object of class '"felm"', the result of a call to -#' \code{\link{felm}(keepX=TRUE)}. -#' @param alpha a data frame, the result of a call to \code{\link{getfe}}. +#' `[felm](keepX=TRUE)`. +#' @param alpha a data frame, the result of a call to [getfe()]. #' @param tol numeric. The absolute tolerance for the bias-corrected #' correlation. #' @param biascorrect logical. Should the estimates be bias corrected? #' @param lhs character. Name of left hand side if multiple left hand sides. -#' @return \code{varvars} returns a vector with a variance estimate for each +#' @return `varvars` returns a vector with a variance estimate for each #' fixed effect variance. I.e. for the diagonal returned by -#' \code{\link{fevcov}}. -#' @note The \code{tol} argument specifies the tolerance as in -#' \code{\link{fevcov}}. Note that if \code{est} is the result of a call to -#' \code{\link{felm}} with \code{keepX=FALSE} (the default), the variances will +#' [fevcov()]. +#' @note The `tol` argument specifies the tolerance as in +#' [fevcov()]. Note that if `est` is the result of a call to +#' [felm()] with `keepX=FALSE` (the default), the variances will #' be estimated as if the covariates X are independent of the factors. There #' is currently no function available for estimating the variance of the -#' covariance estimates from \code{\link{fevcov}}. -#' +#' covariance estimates from [fevcov()]. +#' #' The cited paper does not contain the expressions for the variances computed -#' by \code{varvars} (there's a 10 page limit in that journal), though they can +#' by `varvars` (there's a 10 page limit in that journal), though they can #' be derived in the same fashion as in the paper, with the formula for the #' variance of a quadratic form. -#' @seealso \code{\link{bccorr}} \code{\link{fevcov}} +#' @seealso [bccorr()] [fevcov()] #' @references Gaure, S. (2014), \cite{Correlation bias correction in two-way #' fixed-effects linear regression}, Stat 3(1):379-390, 2014. #' @examples -#' +#' #' x <- rnorm(500) #' x2 <- rnorm(length(x)) -#' +#' #' ## create individual and firm -#' id <- factor(sample(40,length(x),replace=TRUE)) -#' firm <- factor(sample(30,length(x),replace=TRUE,prob=c(2,rep(1,29)))) -#' foo <- factor(sample(20,length(x),replace=TRUE)) +#' id <- factor(sample(40, length(x), replace = TRUE)) +#' firm <- factor(sample(30, length(x), replace = TRUE, prob = c(2, rep(1, 29)))) +#' foo <- factor(sample(20, length(x), replace = TRUE)) #' ## effects #' id.eff <- rnorm(nlevels(id)) #' firm.eff <- rnorm(nlevels(firm)) #' foo.eff <- rnorm(nlevels(foo)) #' ## left hand side #' id.m <- id.eff[id] -#' firm.m <- 2*firm.eff[firm] -#' foo.m <- 3*foo.eff[foo] -#' y <- x + 0.25*x2 + id.m + firm.m + foo.m + rnorm(length(x)) -#' +#' firm.m <- 2 * firm.eff[firm] +#' foo.m <- 3 * foo.eff[foo] +#' y <- x + 0.25 * x2 + id.m + firm.m + foo.m + rnorm(length(x)) +#' #' # make a data frame -#' fr <- data.frame(y,x,x2,id,firm,foo) +#' fr <- data.frame(y, x, x2, id, firm, foo) #' ## estimate and print result -#' est <- felm(y ~ x+x2|id+firm+foo, data=fr, keepX=TRUE) +#' est <- felm(y ~ x + x2 | id + firm + foo, data = fr, keepX = TRUE) #' alpha <- getfe(est) #' # estimate the covariance matrix of the fixed effects #' fevcov(est, alpha) #' # estimate variances of the diagonal #' varvars(est, alpha) -#' +#' #' @export varvars -varvars <- function(est, alpha=getfe(est), tol=0.01, biascorrect=FALSE, lhs=NULL) { - if(nlevels(est$cfactor) > 1) stop('Data should have just a single connected component') +varvars <- function(est, alpha = getfe(est), tol = 0.01, biascorrect = FALSE, lhs = NULL) { + if (nlevels(est$cfactor) > 1) stop("Data should have just a single connected component") fe <- est$fe e <- length(fe) - if(length(tol) == 1) tol <- rep(tol,e) - - if(length(est$lhs) > 1 && is.null(lhs)) - stop('Please specify lhs=[one of ',paste(est$lhs, collapse=','),']') - if(length(est$lhs) == 1) lhs <- est$lhs - effnam <- 'effect' - if(! ('effect' %in% colnames(alpha))) { - effnam <- paste('effect',lhs,sep='.') - if(!(effnam %in% colnames(alpha))) { + if (length(tol) == 1) tol <- rep(tol, e) + + if (length(est$lhs) > 1 && is.null(lhs)) { + stop("Please specify lhs=[one of ", paste(est$lhs, collapse = ","), "]") + } + if (length(est$lhs) == 1) lhs <- est$lhs + effnam <- "effect" + if (!("effect" %in% colnames(alpha))) { + effnam <- paste("effect", lhs, sep = ".") + if (!(effnam %in% colnames(alpha))) { stop("Can't find effect-column in alpha") } } - effs <- lapply(names(fe), function(nm) alpha[alpha[,'fe']==nm, effnam]) - w2 <- if(is.null(est$weights)) 1 else est$weights^2 - resvar <- sum(w2*est$residuals[,lhs]^2)*length(w2)/est$df + effs <- lapply(names(fe), function(nm) alpha[alpha[, "fe"] == nm, effnam]) + w2 <- if (is.null(est$weights)) 1 else est$weights^2 + resvar <- sum(w2 * est$residuals[, lhs]^2) * length(w2) / est$df sapply(1:e, function(index) { varvar(index, fe, est$X, effs[[index]], resvar, tol[index], biascorrect, est$weights) @@ -738,40 +781,40 @@ varvars <- function(est, alpha=getfe(est), tol=0.01, biascorrect=FALSE, lhs=NULL #' Compute limited mobility bias corrected covariance matrix between fixed #' effects -#' +#' #' With a model like \eqn{y = X\beta + D\theta + F\psi + \epsilon}, where #' \eqn{D} and \eqn{F} are matrices with dummy encoded factors, one application #' of \pkg{lfe} is to study the variances \eqn{var(D\theta)}, \eqn{var(F\psi)} #' and covariances \eqn{cov(D\theta, F\psi)}. However, if we use estimates for #' \eqn{\theta} and \eqn{\psi}, the resulting variances are biased. The -#' function \code{fevcov} computes a bias corrected covariance matrix as +#' function `fevcov` computes a bias corrected covariance matrix as #' described in \cite{Gaure (2014)}. -#' -#' The \code{tol} argument specifies the tolerance. The tolerance is relative +#' +#' The `tol` argument specifies the tolerance. The tolerance is relative #' for the variances, i.e. the diagonal of the output. For the covariances, #' the tolerance is relative to the square root of the product of the #' variances, i.e. an absolute tolerance for the correlation. If a numeric of -#' length 1, \code{tol} specifies the same tolerance for all -#' variances/covariances. If it is of length 2, \code{tol[1]} specifies the -#' variance tolerance, and \code{tol[2]} the covariance tolerance. \code{tol} -#' can also be a square matrix of size \code{length(est$fe)}, in which case the +#' length 1, `tol` specifies the same tolerance for all +#' variances/covariances. If it is of length 2, `tol[1]` specifies the +#' variance tolerance, and `tol[2]` the covariance tolerance. `tol` +#' can also be a square matrix of size `length(est$fe)`, in which case the #' tolerance for each variance and covariance is specified individually. -#' +#' #' The function performs no checks for estimability. If the fixed effects are -#' not estimable, the result of a call to \code{fevcov} is not useable. +#' not estimable, the result of a call to `fevcov` is not useable. #' Moreover, there should be just a single connected component among the fixed #' effects. -#' -#' \code{alpha} must contain a full set of coefficients, and contain columns -#' \code{'fe'} and \code{'effect'} like the default estimable functions from -#' \code{\link{efactory}}. -#' -#' In the case that the \code{\link{felm}}-estimation has weights, it is the +#' +#' `alpha` must contain a full set of coefficients, and contain columns +#' `'fe'` and `'effect'` like the default estimable functions from +#' [efactory()]. +#' +#' In the case that the [felm()]-estimation has weights, it is the #' weighted variances and covariance which are bias corrected. -#' +#' #' @param est an object of class '"felm"', the result of a call to -#' \code{\link{felm}(keepX=TRUE)}. -#' @param alpha a data frame, the result of a call to \code{\link{getfe}}. +#' `[felm](keepX=TRUE)`. +#' @param alpha a data frame, the result of a call to [getfe()]. #' @param tol numeric. The absolute tolerance for the bias-corrected #' correlation. #' @param robust logical. Should robust (heteroskedastic or cluster) residuals @@ -779,138 +822,149 @@ varvars <- function(est, alpha=getfe(est), tol=0.01, biascorrect=FALSE, lhs=NULL #' @param maxsamples integer. Maximum number of samples for expectation #' estimates. #' @param lhs character. Name of left hand side if multiple left hand sides. -#' @return \code{fevcov} returns a square matrix with the bias corrected -#' covariances. An attribute \code{'bias'} contains the biases. The bias +#' @return `fevcov` returns a square matrix with the bias corrected +#' covariances. An attribute `'bias'` contains the biases. The bias #' corrections have been subtracted from the bias estimates. I.e. vc = vc' - #' b, where vc' is the biased variance and b is the bias. #' @note Bias correction for IV-estimates are not supported as of now. -#' -#' Note that if \code{est} is the result of a call to \code{\link{felm}} with -#' \code{keepX=FALSE} (the default), the biases will be computed as if the +#' +#' Note that if `est` is the result of a call to [felm()] with +#' `keepX=FALSE` (the default), the biases will be computed as if the #' covariates X are independent of the factors. This will be faster (typically #' by a factor of approx. 4), and possibly wronger. Note also that the #' computations performed by this function are non-trivial, they may take quite #' some time. It would be wise to start out with quite liberal tolerances, #' e.g. \cite{tol=0.1}, to get an idea of the time requirements. -#' -#' If there are only two fixed effects, \code{fevcov} returns the same -#' information as \code{\link{bccorr}}, though in a slightly different format. -#' @seealso \code{\link{varvars}} \code{\link{bccorr}} +#' +#' If there are only two fixed effects, `fevcov` returns the same +#' information as [bccorr()], though in a slightly different format. +#' @seealso [varvars()] [bccorr()] #' @references Gaure, S. (2014), \cite{Correlation bias correction in two-way #' fixed-effects linear regression}, Stat 3(1):379-390, 2014. -#' \url{http://dx.doi.org/10.1002/sta4.68} +#' \doi{10.1002/sta4.68} #' @examples -#' +#' #' x <- rnorm(5000) #' x2 <- rnorm(length(x)) -#' +#' #' ## create individual and firm -#' id <- factor(sample(40,length(x),replace=TRUE)) -#' firm <- factor(sample(30,length(x),replace=TRUE,prob=c(2,rep(1,29)))) -#' foo <- factor(sample(20,length(x),replace=TRUE)) +#' id <- factor(sample(40, length(x), replace = TRUE)) +#' firm <- factor(sample(30, length(x), replace = TRUE, prob = c(2, rep(1, 29)))) +#' foo <- factor(sample(20, length(x), replace = TRUE)) #' ## effects #' id.eff <- rnorm(nlevels(id)) #' firm.eff <- runif(nlevels(firm)) -#' foo.eff <- rchisq(nlevels(foo),df=1) +#' foo.eff <- rchisq(nlevels(foo), df = 1) #' ## left hand side #' id.m <- id.eff[id] #' firm.m <- firm.eff[firm] #' foo.m <- foo.eff[foo] #' # normalize them -#' id.m <- id.m/sd(id.m) -#' firm.m <- firm.m/sd(firm.m) -#' foo.m <- foo.m/sd(foo.m) -#' y <- x + 0.25*x2 + id.m + firm.m + foo.m + rnorm(length(x),sd=2) -#' z <- x + 0.5*x2 + 0.7*id.m + 0.5*firm.m + 0.3*foo.m + rnorm(length(x),sd=2) +#' id.m <- id.m / sd(id.m) +#' firm.m <- firm.m / sd(firm.m) +#' foo.m <- foo.m / sd(foo.m) +#' y <- x + 0.25 * x2 + id.m + firm.m + foo.m + rnorm(length(x), sd = 2) +#' z <- x + 0.5 * x2 + 0.7 * id.m + 0.5 * firm.m + 0.3 * foo.m + rnorm(length(x), sd = 2) #' # make a data frame -#' fr <- data.frame(y,z,x,x2,id,firm,foo) +#' fr <- data.frame(y, z, x, x2, id, firm, foo) #' ## estimate and print result -#' est <- felm(y|z ~ x+x2|id+firm+foo, data=fr, keepX=TRUE) +#' est <- felm(y | z ~ x + x2 | id + firm + foo, data = fr, keepX = TRUE) #' # find bias corrections, there's little bias in this example -#' print(yv <- fevcov(est, lhs='y')) +#' print(yv <- fevcov(est, lhs = "y")) #' ## Here's how to compute the unbiased correlation matrix: #' cm <- cov2cor(yv) -#' structure(cm,bias=NULL) -#' +#' structure(cm, bias = NULL) +#' #' @export fevcov -fevcov <- function(est, alpha=getfe(est), tol=0.01, robust=!is.null(est$clustervar), - maxsamples=Inf, lhs=NULL) { - if(nlevels(est$cfactor) > 1) stop('Data should have just a single connected component') - iaf <- sapply(est$fe, function(f) !is.null(attr(f,'x',exact=TRUE))) - if(any(iaf)) - stop("Bias correction for interacted factor ",paste(names(est$fe)[iaf],collapse=', '), " not supported") - if(length(est$lhs) > 1 && is.null(lhs)) - stop('Please specify lhs=[one of ',paste(est$lhs, collapse=','),']') - if(length(est$lhs) == 1) lhs <- est$lhs - if(is.null(lhs)) lhs <- 1 - effnam <- 'effect' - if(! ('effect' %in% colnames(alpha))) { - effnam <- paste('effect',lhs,sep='.') - if(!(effnam %in% colnames(alpha))) { +fevcov <- function(est, alpha = getfe(est), tol = 0.01, robust = !is.null(est$clustervar), + maxsamples = Inf, lhs = NULL) { + if (nlevels(est$cfactor) > 1) stop("Data should have just a single connected component") + iaf <- sapply(est$fe, function(f) !is.null(attr(f, "x", exact = TRUE))) + if (any(iaf)) { + stop("Bias correction for interacted factor ", paste(names(est$fe)[iaf], collapse = ", "), " not supported") + } + if (length(est$lhs) > 1 && is.null(lhs)) { + stop("Please specify lhs=[one of ", paste(est$lhs, collapse = ","), "]") + } + if (length(est$lhs) == 1) lhs <- est$lhs + if (is.null(lhs)) lhs <- 1 + effnam <- "effect" + if (!("effect" %in% colnames(alpha))) { + effnam <- paste("effect", lhs, sep = ".") + if (!(effnam %in% colnames(alpha))) { stop("Can't find effect-column in alpha") } } - if(is.na(match('fe', colnames(alpha)))) - stop("alpha should contain columns 'fe' and 'effect'") + if (is.na(match("fe", colnames(alpha)))) { + stop("alpha should contain columns 'fe' and 'effect'") + } -# if(!is.null(est$weights)) warning("Bias correction with weights not yet fully correct") + # if(!is.null(est$weights)) warning("Bias correction with weights not yet fully correct") - if(length(tol) == 1) tol <- c(tol,-abs(tol)) - if(length(tol) != 2 && !is.matrix(tol)) - stop('tol must be either a matrix or of length 1 or 2') + if (length(tol) == 1) tol <- c(tol, -abs(tol)) + if (length(tol) != 2 && !is.matrix(tol)) { + stop("tol must be either a matrix or of length 1 or 2") + } K <- length(est$fe) - if(is.matrix(tol) && (ncol(tol) != K || nrow(tol) != K)) - stop('tol matrix must be square, with size the number of fixed effects: ',K) - if(!is.matrix(tol)) { + if (is.matrix(tol) && (ncol(tol) != K || nrow(tol) != K)) { + stop("tol matrix must be square, with size the number of fixed effects: ", K) + } + if (!is.matrix(tol)) { tmp <- tol - tol <- matrix(0,K,K) + tol <- matrix(0, K, K) diag(tol) <- tmp[1] tol[col(tol) != row(tol)] <- tmp[2] } # compute the biased variances fe <- est$fe - effs <- lapply(names(fe), function(nm) alpha[alpha[,'fe']==nm, effnam][fe[[nm]]]) + effs <- lapply(names(fe), function(nm) alpha[alpha[, "fe"] == nm, effnam][fe[[nm]]]) names(effs) <- names(fe) - bvcv <- matrix(0,K,K) + bvcv <- matrix(0, K, K) colnames(bvcv) <- rownames(bvcv) <- names(fe) diag(bvcv) <- sapply(effs, var) - for(i in 1:K) { - for(j in i:K) { - if(is.null(est$weights)) { - bvcv[i,j] <- bvcv[j,i] <- cov(effs[[i]],effs[[j]]) + for (i in 1:K) { + for (j in i:K) { + if (is.null(est$weights)) { + bvcv[i, j] <- bvcv[j, i] <- cov(effs[[i]], effs[[j]]) } else { - bvcv[i,j] <- bvcv[j,i] <- wcov(effs[[i]], effs[[j]], est$weights^2) + bvcv[i, j] <- bvcv[j, i] <- wcov(effs[[i]], effs[[j]], est$weights^2) } } } # compute the variances - bias <- matrix(0,K,K) + bias <- matrix(0, K, K) colnames(bias) <- rownames(bias) <- names(fe) - resid <- est$residuals[,lhs] - cgtf <- rep(1,K) - diag(bias) <- sapply(1:K, function(i) varbias(i, est, tol[i,i], bvcv[i,i], - maxsamples, resid=resid,weights=est$weights, - dfadj=(est$N-1)/est$df)) - + resid <- est$residuals[, lhs] + cgtf <- rep(1, K) + diag(bias) <- sapply(1:K, function(i) { + varbias(i, est, tol[i, i], bvcv[i, i], + maxsamples, + resid = resid, weights = est$weights, + dfadj = (est$N - 1) / est$df + ) + }) + wtf <- diag(bias) > diag(bvcv) # update off-diagonal tolerances by the variances offdiag <- col(tol) != row(tol) - if(any(wtf)) { - message('Some variance biases are larger than the variances. Setting them equal:') - print(cbind(variance=diag(bvcv),bias=diag(bias))) - diag(bias)[wtf] <- 0.999*diag(bvcv)[wtf] - tol[offdiag] <- -(abs(tol)*sqrt(abs(tcrossprod(diag(bvcv)-0.9*diag(bias)))))[offdiag] + if (any(wtf)) { + message("Some variance biases are larger than the variances. Setting them equal:") + print(cbind(variance = diag(bvcv), bias = diag(bias))) + diag(bias)[wtf] <- 0.999 * diag(bvcv)[wtf] + tol[offdiag] <- -(abs(tol) * sqrt(abs(tcrossprod(diag(bvcv) - 0.9 * diag(bias)))))[offdiag] } else { - tol[offdiag] <- -(abs(tol)*sqrt(abs(tcrossprod(diag(bvcv)-diag(bias)))))[offdiag] + tol[offdiag] <- -(abs(tol) * sqrt(abs(tcrossprod(diag(bvcv) - diag(bias)))))[offdiag] } # compute the covariances - if(K > 1) { - for(i in 1:(K-1)) { - for(j in (i+1):K) - bias[i,j] <- bias[j,i] <- covbias(c(i,j),est,tol[i,j],maxsamples, - resid=resid, weights=est$weights) - } - } - structure(bvcv-bias, bias=bias) + if (K > 1) { + for (i in 1:(K - 1)) { + for (j in (i + 1):K) { + bias[i, j] <- bias[j, i] <- covbias(c(i, j), est, tol[i, j], maxsamples, + resid = resid, weights = est$weights + ) + } + } + } + structure(bvcv - bias, bias = bias) } diff --git a/R/btrap.R b/R/btrap.R index e361bc0..fd09d28 100644 --- a/R/btrap.R +++ b/R/btrap.R @@ -1,81 +1,82 @@ #' Bootstrap standard errors for the group fixed effects -#' +#' #' Bootstrap standard errors for the group fixed effects which were swept out -#' during an estimation with \code{\link{felm}}. -#' -#' The bootstrapping is done in parallel if \code{threads > 1}. -#' \code{\link{btrap}} is run automatically from \code{\link{getfe}} if -#' \code{se=TRUE} is specified. To save some overhead, the individual +#' during an estimation with [felm()]. +#' +#' The bootstrapping is done in parallel if `threads > 1`. +#' [btrap()] is run automatically from [getfe()] if +#' `se=TRUE` is specified. To save some overhead, the individual #' iterations are grouped together, the memory available for this grouping is -#' fetched with \code{getOption('lfe.bootmem')}, which is initialized upon -#' loading of \pkg{lfe} to \code{options(lfe.bootmem=500)} (MB). -#' -#' If \code{robust=TRUE}, heteroskedastic robust standard errors are estimated. -#' If \code{robust=FALSE} and \code{cluster=TRUE}, clustered standard errors -#' with the cluster specified to \code{felm()} are estimated. If \code{cluster} -#' is a factor, it is used for the cluster definition. \code{cluster may} also +#' fetched with `getOption('lfe.bootmem')`, which is initialized upon +#' loading of \pkg{lfe} to `options(lfe.bootmem=500)` (MB). +#' +#' If `robust=TRUE`, heteroskedastic robust standard errors are estimated. +#' If `robust=FALSE` and `cluster=TRUE`, clustered standard errors +#' with the cluster specified to `felm()` are estimated. If `cluster` +#' is a factor, it is used for the cluster definition. `cluster may` also #' be a list of factors. -#' -#' @param alpha data frame returned from \code{\link{getfe}} -#' @param obj object of class \code{"felm"}, usually, a result of a call to -#' \code{\link{felm}} +#' +#' @param alpha data frame returned from [getfe()] +#' @param obj object of class `"felm"`, usually, a result of a call to +#' [felm()] #' @param N integer. The number of bootstrap iterations -#' @param ef function. An estimable function such as in \code{\link{getfe}}. -#' The default is to use the one used on \code{alpha} +#' @param ef function. An estimable function such as in [getfe()]. +#' The default is to use the one used on `alpha` #' @param eps double. Tolerance for centering, as in getfe #' @param threads integer. The number of threads to use #' @param robust logical. Should heteroskedastic standard errors be estimated? #' @param cluster logical or factor. Estimate clustered standard errors. -#' @param lhs character vector. Specify which left hand side if \code{obj} has +#' @param lhs character vector. Specify which left hand side if `obj` has #' multiple lhs. #' @return A data-frame of the same size as alpha is returned, with standard #' errors filled in. #' @examples -#' -#' oldopts <- options(lfe.threads=2) +#' +#' oldopts <- options("lfe.threads") +#' options(lfe.threads = 2) #' ## create covariates #' x <- rnorm(3000) #' x2 <- rnorm(length(x)) -#' +#' #' ## create individual and firm -#' id <- factor(sample(700,length(x),replace=TRUE)) -#' firm <- factor(sample(300,length(x),replace=TRUE)) -#' +#' id <- factor(sample(700, length(x), replace = TRUE)) +#' firm <- factor(sample(300, length(x), replace = TRUE)) +#' #' ## effects #' id.eff <- rlnorm(nlevels(id)) #' firm.eff <- rexp(nlevels(firm)) -#' +#' #' ## left hand side -#' y <- x + 0.25*x2 + id.eff[id] + firm.eff[firm] + rnorm(length(x)) -#' +#' y <- x + 0.25 * x2 + id.eff[id] + firm.eff[firm] + rnorm(length(x)) +#' #' ## estimate and print result -#' est <- felm(y ~ x+x2 | id + firm) +#' est <- felm(y ~ x + x2 | id + firm) #' summary(est) #' ## extract the group effects #' alpha <- getfe(est) #' head(alpha) #' ## bootstrap standard errors -#' head(btrap(alpha,est)) -#' +#' head(btrap(alpha, est)) +#' #' ## bootstrap some differences -#' ef <- function(v,addnames) { -#' w <- c(v[2]-v[1],v[3]-v[2],v[3]-v[1]) -#' if(addnames) { -#' names(w) <-c('id2-id1','id3-id2','id3-id1') -#' attr(w,'extra') <- list(note=c('line1','line2','line3')) +#' ef <- function(v, addnames) { +#' w <- c(v[2] - v[1], v[3] - v[2], v[3] - v[1]) +#' if (addnames) { +#' names(w) <- c("id2-id1", "id3-id2", "id3-id1") +#' attr(w, "extra") <- list(note = c("line1", "line2", "line3")) #' } #' w #' } #' # check that it's estimable -#' is.estimable(ef,est$fe) -#' -#' head(btrap(alpha,est,ef=ef)) +#' is.estimable(ef, est$fe) +#' +#' head(btrap(alpha, est, ef = ef)) #' options(oldopts) -#' +#' #' @export btrap -btrap <- function(alpha,obj,N=100,ef=NULL,eps=getOption('lfe.eps'), - threads=getOption('lfe.threads'), robust=FALSE, - cluster=NULL, lhs=NULL) { +btrap <- function(alpha, obj, N = 100, ef = NULL, eps = getOption("lfe.eps"), + threads = getOption("lfe.threads"), robust = FALSE, + cluster = NULL, lhs = NULL) { # bootstrap the stuff. The name 'btrap' is chosen to instill a feeling of being trapped # (in some long running stuff which will never complete) # bootstrapping is really to draw residuals over again, i.e. to change @@ -83,39 +84,40 @@ btrap <- function(alpha,obj,N=100,ef=NULL,eps=getOption('lfe.eps'), # drawing from the residuals. We need a new r.h.s to replace # the current (I-P)(Y-Xbeta), i.e. (I-P)(Y-Xbeta+delta) where delta is resampled from # the residuals PY-PXbeta. Then we adjust for degrees of freedom, which is component specific. - - if(is.logical(cluster)) { - if(cluster) cluster <- obj$clustervar else cluster <- NULL - } else if(!is.null(cluster)) { - if(is.list(cluster)) - cluster <- lapply(cluster,factor) - else - cluster <- list(factor(cluster)) + + if (is.logical(cluster)) { + if (cluster) cluster <- obj$clustervar else cluster <- NULL + } else if (!is.null(cluster)) { + if (is.list(cluster)) { + cluster <- lapply(cluster, factor) + } else { + cluster <- list(factor(cluster)) + } } - if(is.null(ef)) { + if (is.null(ef)) { # use the one used with alpha - ef <- attr(alpha,'ef') + ef <- attr(alpha, "ef") } else { - if(is.character(ef)) ef <- efactory(obj,opt=ef) + if (is.character(ef)) ef <- efactory(obj, opt = ef) # redo the point estimates - v <- ef(alpha[,'effect'],TRUE) - alpha <- data.frame(effect=v) + v <- ef(alpha[, "effect"], TRUE) + alpha <- data.frame(effect = v) rownames(alpha) <- names(v) - if(!is.null(attr(v,'extra'))) alpha <- cbind(alpha,attr(v,'extra')) + if (!is.null(attr(v, "extra"))) alpha <- cbind(alpha, attr(v, "extra")) } - if(is.null(lhs)) { - R <- obj$r.residuals-obj$residuals + if (is.null(lhs)) { + R <- obj$r.residuals - obj$residuals smpdraw <- as.vector(obj$residuals) } else { - R <- obj$r.residuals[,lhs]-obj$residuals[,lhs] - smpdraw <- as.vector(obj$residuals[,lhs]) + R <- obj$r.residuals[, lhs] - obj$residuals[, lhs] + smpdraw <- as.vector(obj$residuals[, lhs]) } w <- obj$weights # if there are weights, smpdraw should be weighted - if(!is.null(w)) smpdraw <- smpdraw * w + if (!is.null(w)) smpdraw <- smpdraw * w # Now, we want to do everything in parallel, so we should allocate up a set # of vectors, but we don't want to blow the memory. Stick to allocating two @@ -124,61 +126,62 @@ btrap <- function(alpha,obj,N=100,ef=NULL,eps=getOption('lfe.eps'), # hmm, up to 500 MB of vectors, we say, but no less than two per thread # (one per thread is bad for balance, if time to completion varies) # divide by two because we use a copy in the demeanlist step. - maxB <- getOption('lfe.bootmem')*1e6/2 - vpt <- max(2,as.integer(min(maxB/(length(R)*8),N)/threads)) - vpb <- vpt*threads + maxB <- getOption("lfe.bootmem") * 1e6 / 2 + vpt <- max(2, as.integer(min(maxB / (length(R) * 8), N) / threads)) + vpb <- vpt * threads blks <- as.integer(ceiling(N / vpb)) - newN <- blks*vpb + newN <- blks * vpb vsum <- 0 vsq <- 0 start <- last <- as.integer(Sys.time()) gc() - if(is.null(lhs)) - predy <- obj$fitted.values - else - predy <- obj$fitted.values[,lhs] + if (is.null(lhs)) { + predy <- obj$fitted.values + } else { + predy <- obj$fitted.values[, lhs] + } - if(!is.null(cluster)) { - # now, what about multiway clustering? + if (!is.null(cluster)) { + # now, what about multiway clustering? # try the Cameron-Gelbach-Miller stuff. # make the interacted factors, with sign iac <- list() d <- length(cluster) - for(i in 1:(2^d-1)) { + for (i in 1:(2^d - 1)) { # Find out which ones to interact iab <- as.logical(intToBits(i))[1:d] # odd number is positive, even is negative - sgn <- 2*(sum(iab) %% 2) - 1 - iac[[i]] <- list(sgn=sgn/choose(d,sum(iab)),ib=iab) + sgn <- 2 * (sum(iab) %% 2) - 1 + iac[[i]] <- list(sgn = sgn / choose(d, sum(iab)), ib = iab) } } - X <- model.matrix(obj,centred=NA) - cX <- attr(X,'cX') + X <- model.matrix(obj, centred = NA) + cX <- attr(X, "cX") - for(i in 1:blks) { - if(robust) { + for (i in 1:blks) { + if (robust) { # robust residuals, variance is each squared residual # rsamp <- rnorm(vpb*length(smpdraw))*abs(smpdraw) - rsamp <- lapply(1:vpb,function(i) rnorm(length(smpdraw))*abs(smpdraw)) - } else if(!is.null(cluster)) { + rsamp <- lapply(1:vpb, function(i) rnorm(length(smpdraw)) * abs(smpdraw)) + } else if (!is.null(cluster)) { # Wild bootstrap with Rademacher distribution - rsamp <- lapply(1:vpb, function (i) { - if(length(cluster) == 1) { - smpdraw*sample(c(-1,1),nlevels(cluster[[1]]), replace=TRUE)[cluster[[1]]] + rsamp <- lapply(1:vpb, function(i) { + if (length(cluster) == 1) { + smpdraw * sample(c(-1, 1), nlevels(cluster[[1]]), replace = TRUE)[cluster[[1]]] } else { # draw a Rademacher dist for each single cluster - rad <- lapply(cluster,function(f) sample(c(-1,1),nlevels(f),replace=TRUE)[f]) - Reduce('+',lapply(iac, function(ia) ia$sgn*smpdraw * Reduce('*',rad[ia$ib]))) + rad <- lapply(cluster, function(f) sample(c(-1, 1), nlevels(f), replace = TRUE)[f]) + Reduce("+", lapply(iac, function(ia) ia$sgn * smpdraw * Reduce("*", rad[ia$ib]))) } }) } else { # IID residuals - rsamp <- lapply(1:vpb, function(i) sample(smpdraw, replace=TRUE)) + rsamp <- lapply(1:vpb, function(i) sample(smpdraw, replace = TRUE)) } - if(!is.null(w)) rsamp <- lapply(rsamp, function(x) x/w) - if(length(cX) > 0) { + if (!is.null(w)) rsamp <- lapply(rsamp, function(x) x / w) + if (length(cX) > 0) { newr <- lapply(rsamp, function(rs) { newy <- predy + rs newbeta <- obj$inv %*% crossprod(cX, newy) @@ -189,29 +192,36 @@ btrap <- function(alpha,obj,N=100,ef=NULL,eps=getOption('lfe.eps'), newr <- lapply(rsamp, function(rs) as.vector(predy) + rs) } rm(rsamp) - v <- kaczmarz(obj$fe, demeanlist(unnamed(newr), obj$fe, eps=eps, threads=threads, - means=TRUE, weights=w), - eps=eps, threads=threads) + v <- kaczmarz(obj$fe, demeanlist(unnamed(newr), obj$fe, + eps = eps, threads = threads, + means = TRUE, weights = w + ), + eps = eps, threads = threads + ) rm(newr) - efv <- lapply(v,ef,addnames=FALSE) - vsum <- vsum + Reduce('+',efv) - vsq <- vsq + Reduce('+',Map(function(i) i^2, efv)) + efv <- lapply(v, ef, addnames = FALSE) + vsum <- vsum + Reduce("+", efv) + vsq <- vsq + Reduce("+", Map(function(i) i^2, efv)) now <- as.integer(Sys.time()) - if(now-last > 300) { - cat('...finished',i*vpb,'of',newN,'vectors in',now-start,'seconds\n') + if (now - last > 300) { + cat("...finished", i * vpb, "of", newN, "vectors in", now - start, "seconds\n") last <- now } } - if(robust) - sename <- 'robustse' - else if(!is.null(cluster)) - sename <- 'clusterse' - else - sename <- 'se' - fSEname <- SEname <- 'se' + if (robust) { + sename <- "robustse" + } else if (!is.null(cluster)) { + sename <- "clusterse" + } else { + sename <- "se" + } + fSEname <- SEname <- "se" fsename <- sename - if(!is.null(lhs)) {fsename <- paste(sename,lhs,sep='.'); fSEname <- paste(SEname,lhs,sep='.');} - alpha[,fsename] <- sqrt(vsq/newN - (vsum/newN)**2)/(1-0.75/newN-7/32/newN**2-9/128/newN**3) - if(sename != 'se') alpha[,fSEname] <- alpha[,fsename] - return(structure(alpha,sename=sename)) + if (!is.null(lhs)) { + fsename <- paste(sename, lhs, sep = ".") + fSEname <- paste(SEname, lhs, sep = ".") + } + alpha[, fsename] <- sqrt(vsq / newN - (vsum / newN)**2) / (1 - 0.75 / newN - 7 / 32 / newN**2 - 9 / 128 / newN**3) + if (sename != "se") alpha[, fSEname] <- alpha[, fsename] + return(structure(alpha, sename = sename)) } diff --git a/R/cgsolve.R b/R/cgsolve.R index 45ef665..c8e0db6 100644 --- a/R/cgsolve.R +++ b/R/cgsolve.R @@ -1,20 +1,24 @@ # $Id: cgsolve.R 1990 2016-04-14 13:48:36Z sgaure $ # From "A practical termination criterion for the conjugate gradient method", E.F. Kaasschieter -# BIT 28 (1988), 308-322 (2.6). +# BIT 28 (1988), 308-322 (2.6). # return the vector phi_i(x) for i=1:j phi <- function(j, x, alpha, beta) { - if(j == 1) return(1) + if (j == 1) { + return(1) + } psiprev <- 1 - phiprev <- 1-alpha[1]*x - res <- c(1,phiprev) - if(j == 2) return(res) - - for(i in 3:j) { - psi <- phiprev + beta[i-1]*psiprev - phi <- phiprev - alpha[i]*x*psi - res <- c(res,phi) + phiprev <- 1 - alpha[1] * x + res <- c(1, phiprev) + if (j == 2) { + return(res) + } + + for (i in 3:j) { + psi <- phiprev + beta[i - 1] * psiprev + phi <- phiprev - alpha[i] * x * psi + res <- c(res, phi) phiprev <- phi psiprev <- psi } @@ -22,31 +26,34 @@ phi <- function(j, x, alpha, beta) { } newmu <- function(oldmu, i, alpha, beta) { - if(all(phi(i,oldmu, alpha,beta) > 0)) return(oldmu) + if (all(phi(i, oldmu, alpha, beta) > 0)) { + return(oldmu) + } u <- 1e-3 y <- 0 z <- oldmu # binary search x <- oldmu - while(z - y > u*y) { - x <- (y+z)/2 - if(all(phi(i,x,alpha,beta) > 0)) - y <- x - else - z <- x + while (z - y > u * y) { + x <- (y + z) / 2 + if (all(phi(i, x, alpha, beta) > 0)) { + y <- x + } else { + z <- x + } } return(x) } newmus <- function(oldmus, i, alpha, beta) { res <- NULL - for(j in seq_along(oldmus)) { - res <- c(res, newmu(oldmus[j],i,alpha[,j],beta[,j])) + for (j in seq_along(oldmus)) { + res <- c(res, newmu(oldmus[j], i, alpha[, j], beta[, j])) } res } -#stop('debug') +# stop('debug') # conjugate gradient solver Ax = b, b may be matrix # If A is a function, it must be able to take a matrix argument # Algorithm 3 from Kaasschieter (1988) @@ -57,39 +64,39 @@ newmus <- function(oldmus, i, alpha, beta) { #' Solve a symmetric linear system with the conjugate gradient method -#' -#' -#' \code{cgsolve} uses a conjugate gradient algorithm to solve the linear -#' system \eqn{A x = b} where \eqn{A} is a symmetric matrix. \code{cgsolve} is -#' used internally in \pkg{lfe} in the routines \code{\link{fevcov}} and -#' \code{\link{bccorr}}, but has been made public because it might be useful +#' +#' +#' `cgsolve` uses a conjugate gradient algorithm to solve the linear +#' system \eqn{A x = b} where \eqn{A} is a symmetric matrix. `cgsolve` is +#' used internally in \pkg{lfe} in the routines [fevcov()] and +#' [bccorr()], but has been made public because it might be useful #' for other purposes as well. -#' -#' -#' The argument \code{A} can be a symmetric matrix or a symmetric sparse matrix -#' inheriting from \code{"Matrix"} of the package \pkg{Matrix}. It can also be +#' +#' +#' The argument `A` can be a symmetric matrix or a symmetric sparse matrix +#' inheriting from `"Matrix"` of the package \pkg{Matrix}. It can also be #' a function which performs the matrix-vector product. If so, the function #' must be able to take as input a matrix of column vectors. -#' -#' If the matrix \code{A} is rank deficient, some solution is returned. If +#' +#' If the matrix `A` is rank deficient, some solution is returned. If #' there is no solution, a vector is returned which may or may not be close to -#' a solution. If \code{symmtest} is \code{FALSE}, no check is performed that -#' \code{A} is symmetric. If not symmetric, \code{cgsolve} is likely to raise +#' a solution. If `symmtest` is `FALSE`, no check is performed that +#' `A` is symmetric. If not symmetric, `cgsolve` is likely to raise #' an error about divergence. -#' -#' The tolerance \code{eps} is a relative tolerance, i.e. \eqn{||x - x_0|| < +#' +#' The tolerance `eps` is a relative tolerance, i.e. \eqn{||x - x_0|| < #' \epsilon ||x_0||} where \eqn{x_0} is the true solution and \eqn{x} is the -#' solution returned by \code{cgsolve}. Use a negative \code{eps} for absolute -#' tolerance. The termination criterion for \code{cgsolve} is the one from +#' solution returned by `cgsolve`. Use a negative `eps` for absolute +#' tolerance. The termination criterion for `cgsolve` is the one from #' \cite{Kaasschieter (1988)}, Algorithm 3. -#' +#' #' Preconditioning is currently not supported. -#' -#' If \code{A} is a function, the test for symmetry is performed by drawing two -#' random vectors \code{x,y}, and testing whether \eqn{|(Ax, y) - (x, Ay)| < +#' +#' If `A` is a function, the test for symmetry is performed by drawing two +#' random vectors `x,y`, and testing whether \eqn{|(Ax, y) - (x, Ay)| < #' 10^{-6} sqrt((||Ax||^2 + ||Ay||^2)/N)}, where \eqn{N} is the vector length. #' Thus, the test is neither deterministic nor perfect. -#' +#' #' @param A matrix, Matrix or function. #' @param b vector or matrix of columns vectors. #' @param eps numeric. Tolerance. @@ -97,48 +104,49 @@ newmus <- function(oldmus, i, alpha, beta) { #' @param symmtest logical. Should the matrix be tested for symmetry? #' @param name character. Arbitrary name used in progress reports. #' @return -#' +#' #' A solution \eqn{x} of the linear system \eqn{A x = b} is returned. -#' @seealso \code{\link{kaczmarz}} +#' @seealso [kaczmarz()] #' @references Kaasschieter, E. (1988) \cite{A practical termination criterion #' for the conjugate gradient method}, BIT Numerical Mathematics, -#' 28(2):308-322. \url{http://link.springer.com/article/10.1007\%2FBF01934094} +#' 28(2):308-322. #' @examples -#' -#' N <- 100000 +#' +#' N <- 100000 #' # create some factors -#' f1 <- factor(sample(34000,N,replace=TRUE)) -#' f2 <- factor(sample(25000,N,replace=TRUE)) +#' f1 <- factor(sample(34000, N, replace = TRUE)) +#' f2 <- factor(sample(25000, N, replace = TRUE)) #' # a matrix of dummies, which probably is rank deficient -#' B <- makeDmatrix(list(f1,f2)) -#' dim(B) +#' B <- makeDmatrix(list(f1, f2)) +#' dim(B) #' # create a right hand side -#' b <- as.matrix(B %*% rnorm(ncol(B))) +#' b <- as.matrix(B %*% rnorm(ncol(B))) #' # solve B' B x = B' b -#' sol <- cgsolve(crossprod(B), crossprod(B, b), eps=-1e-2) -#' #verify solution -#' sqrt(sum((B %*% sol - b)^2)) -#' +#' sol <- cgsolve(crossprod(B), crossprod(B, b), eps = -1e-2) +#' # verify solution +#' sqrt(sum((B %*% sol - b)^2)) +#' #' @export cgsolve -cgsolve <- function(A, b, eps=1e-3,init=NULL, symmtest=FALSE, name='') { +cgsolve <- function(A, b, eps = 1e-3, init = NULL, symmtest = FALSE, name = "") { start <- Sys.time() - precon <- attr(A,'precon') + precon <- attr(A, "precon") oneini <- 0 - if(is.matrix(b) || inherits(b,'Matrix')) N <- nrow(b) else N <- length(b) - if(is.matrix(A) || inherits(A,'Matrix') ) { - if(symmtest && !isSymmetric(A)) stop("matrix is not symmetric") - fun <- function(v) as.matrix(A %*% v) - } else if(is.function(A)) { + if (is.matrix(b) || inherits(b, "Matrix")) N <- nrow(b) else N <- length(b) + if (is.matrix(A) || inherits(A, "Matrix")) { + if (symmtest && !isSymmetric(A)) stop("matrix is not symmetric") + fun <- function(v) as.matrix(A %*% v) + } else if (is.function(A)) { fun <- function(v) as.matrix(A(v)) - if(symmtest) { + if (symmtest) { # A symmetric matrix satisfies (Ax,y) = (Ay,x) for every y and x - x <- as.matrix(runif(N,0.7,1.4)) - y <- as.matrix(runif(N,0.7,1.4)) + x <- as.matrix(runif(N, 0.7, 1.4)) + y <- as.matrix(runif(N, 0.7, 1.4)) Ax <- fun(x) Ay <- fun(y) - if(abs(sum(Ax * y) - sum(Ay * x)) > 1e-6*sqrt(mean(Ax^2) + mean(Ay^2))) - warning(deparse(substitute(A)), ' seems to be non-symmetric') - rm(x,y,Ax,Ay) + if (abs(sum(Ax * y) - sum(Ay * x)) > 1e-6 * sqrt(mean(Ax^2) + mean(Ay^2))) { + warning(deparse(substitute(A)), " seems to be non-symmetric") + } + rm(x, y, Ax, Ay) } } else { stop("A must be a matrix, Matrix or a function") @@ -147,106 +155,111 @@ cgsolve <- function(A, b, eps=1e-3,init=NULL, symmtest=FALSE, name='') { b <- as.matrix(b) start <- last <- Sys.time() -# if(is.null(init)) init <- matrix(rnorm(nrow(b)*ncol(b)),nrow(b)) - if(is.null(init)) { - x <- matrix(0,N,ncol(b)) + # if(is.null(init)) init <- matrix(rnorm(nrow(b)*ncol(b)),nrow(b)) + if (is.null(init)) { + x <- matrix(0, N, ncol(b)) r <- b } else { x <- as.matrix(init) - if(ncol(x) == 1 && ncol(b) > 1) - x <- matrix(init, N, ncol(b)) + if (ncol(x) == 1 && ncol(b) > 1) { + x <- matrix(init, N, ncol(b)) + } r <- b - fun(x) } p <- r tim <- 0 -# first iteration outside loop + # first iteration outside loop oldr2 <- colSums(r^2) minr2 <- sqrt(max(oldr2)) Ap <- fun(p) - alpha <- oldr2/colSums(p * Ap) - mu <- 1/alpha - allalpha <- matrix(alpha,1) + alpha <- oldr2 / colSums(p * Ap) + mu <- 1 / alpha + allalpha <- matrix(alpha, 1) - x <- x + t(alpha*t(p)) - r <- r - t(alpha*t(Ap)) + x <- x + t(alpha * t(p)) + r <- r - t(alpha * t(Ap)) r2 <- colSums(r^2) - - res <- matrix(0,nrow(x),ncol(x)) + + res <- matrix(0, nrow(x), ncol(x)) origcol <- 1:ncol(b) k <- 0 kk <- 0 allbeta <- NULL - if(length(eps) == 1) eps <- rep(eps,ncol(b)) + if (length(eps) == 1) eps <- rep(eps, ncol(b)) negeps <- eps < 0 eps <- abs(eps) - eps <- ifelse(negeps, eps, eps/(1+eps)) - pint <- getOption('lfe.pint') - while(TRUE) { - - k <- k+1 - delta <- mu * ifelse(negeps,1,sqrt(colSums(x^2))) + eps <- ifelse(negeps, eps, eps / (1 + eps)) + pint <- getOption("lfe.pint") + while (TRUE) { + k <- k + 1 + delta <- mu * ifelse(negeps, 1, sqrt(colSums(x^2))) now <- Sys.time() - dt <- as.numeric(now-last, units='secs') - if(dt > pint) { + dt <- as.numeric(now - last, units = "secs") + if (dt > pint) { last <- now - message(date(), ' CG iter ',k,' ',name,' target=',signif(min(eps),3), - ' delta=',signif(max(sqrt(r2)/delta),3), ' vecs=',length(delta), ' mu ',signif(min(mu),3)) + message( + date(), " CG iter ", k, " ", name, " target=", signif(min(eps), 3), + " delta=", signif(max(sqrt(r2) / delta), 3), " vecs=", length(delta), " mu ", signif(min(mu), 3) + ) } done <- sqrt(r2) < eps * delta | (k >= N) - if(any(done)) { -# message('finished vec ',ilpretty(origcol[done])) + if (any(done)) { + # message('finished vec ',ilpretty(origcol[done])) # remove the finished vectors - res[,origcol[done]] <- x[,done,drop=FALSE] + res[, origcol[done]] <- x[, done, drop = FALSE] origcol <- origcol[!done] eps <- eps[!done] negeps <- negeps[!done] - x <- x[,!done, drop=FALSE] - if(length(origcol) == 0) break; - p <- p[,!done, drop=FALSE] - r <- r[,!done, drop=FALSE] + x <- x[, !done, drop = FALSE] + if (length(origcol) == 0) break + p <- p[, !done, drop = FALSE] + r <- r[, !done, drop = FALSE] r2 <- r2[!done] oldr2 <- oldr2[!done] mu <- mu[!done] - allalpha <- allalpha[,!done, drop=FALSE] - allbeta <- allbeta[,!done, drop=FALSE] + allalpha <- allalpha[, !done, drop = FALSE] + allbeta <- allbeta[, !done, drop = FALSE] kk <- 0 } r2rms <- sqrt(max(r2)) - minr2 <- min(minr2,r2rms) - if(( kk > 1000 && r2rms > 100*minr2) || (k > 100 && r2rms > 10000*minr2)) { - warning('cgsolve (',name,') seems to diverge, iter=',k,', ||r2||=',r2rms, - ' returning imprecise solutions') - res[,origcol[seq_len(ncol(x))]] <- x + minr2 <- min(minr2, r2rms) + if ((kk > 1000 && r2rms > 100 * minr2) || (k > 100 && r2rms > 10000 * minr2)) { + warning( + "cgsolve (", name, ") seems to diverge, iter=", k, ", ||r2||=", r2rms, + " returning imprecise solutions" + ) + res[, origcol[seq_len(ncol(x))]] <- x return(res) } kk <- kk + 1 - beta <- r2/oldr2 + beta <- r2 / oldr2 allbeta <- rbind(allbeta, beta) -# we have created some C-functions to handle some operations. -# Solely to avoid copying large matrices. - p <- .Call(C_pdaxpy,r,p,beta) -# p <- r + t(beta*t(p)) + # we have created some C-functions to handle some operations. + # Solely to avoid copying large matrices. + p <- .Call(C_pdaxpy, r, p, beta) + # p <- r + t(beta*t(p)) Ap <- fun(p) -# gc() - alpha <- r2/.Call(C_piproduct, Ap, p) -# alpha <- r2/colSums(Ap*p) -# message(name, ' ', k, ' alpha '); print(alpha) + # gc() + alpha <- r2 / .Call(C_piproduct, Ap, p) + # alpha <- r2/colSums(Ap*p) + # message(name, ' ', k, ' alpha '); print(alpha) allalpha <- rbind(allalpha, alpha) x <- .Call(C_pdaxpy, x, p, alpha) r <- .Call(C_pdaxpy, r, Ap, -alpha) oldr2 <- r2 r2 <- colSums(r^2) - mu <- newmus(mu, k+1, allalpha, allbeta) + mu <- newmus(mu, k + 1, allalpha, allbeta) } -# message('CG iters:',k) + # message('CG iters:',k) now <- Sys.time() - dt <- as.numeric(now-start, units='secs') - if(dt > pint) - message(' *** cgsolve(',name,') finished with ',k,' iters in ',as.integer(dt),' seconds') + dt <- as.numeric(now - start, units = "secs") + if (dt > pint) { + message(" *** cgsolve(", name, ") finished with ", k, " iters in ", as.integer(dt), " seconds") + } res } diff --git a/R/chmethod.R b/R/chmethod.R index dbf1f58..78615b2 100644 --- a/R/chmethod.R +++ b/R/chmethod.R @@ -1,50 +1,50 @@ # $Id: chmethod.R 1693 2015-04-07 09:36:29Z sgaure $ -findfe <- function(dd,Rhs,se=FALSE) { +findfe <- function(dd, Rhs, se = FALSE) { # find references - refnames <- attr(dd,'refnames') - nm <- c(colnames(dd),refnames) - refcnt <- attr(dd,'refcnt') + refnames <- attr(dd, "refnames") + nm <- c(colnames(dd), refnames) + refcnt <- attr(dd, "refcnt") # add in the reference in dg - dg <- c(diag(dd),refcnt) + dg <- c(diag(dd), refcnt) ok <- 1:ncol(dd) - if(se) sev <- double(length(nm)) + if (se) sev <- double(length(nm)) alphacoef <- double(length(nm)) # the super-nodal algorithm # is default and far better, but it consumes more memory - trysolve <- try(solve(dd,Rhs)) - if(inherits(trysolve,'try-error')) { - if(grepl('problem too large',geterrmessage())) { - message(paste('Never mind, trying *slower* non-supernodal algorithm, nnz=',nnzero(dd))) - message(paste(date(),'This may be an opportunity for a nice cup of tea. Or two.')) + trysolve <- try(solve(dd, Rhs)) + if (inherits(trysolve, "try-error")) { + if (grepl("problem too large", geterrmessage())) { + message(paste("Never mind, trying *slower* non-supernodal algorithm, nnz=", nnzero(dd))) + message(paste(date(), "This may be an opportunity for a nice cup of tea. Or two.")) gc() - ch <- Cholesky(dd,super=FALSE,perm=TRUE) - trysolve <- solve(ch,Rhs) + ch <- Cholesky(dd, super = FALSE, perm = TRUE) + trysolve <- solve(ch, Rhs) rm(ch) gc() } else { stop(geterrmessage()) } - } + } alphacoef[ok] <- as.vector(trysolve) - if(se) { + if (se) { # is there a faster way to find the diagonal of the inverse? - sev[ok] <- sqrt(diag(solve(dd)))*attr(se,'sefactor') - alpha <- data.frame(effect=alphacoef,se=sev,obs=dg) + sev[ok] <- sqrt(diag(solve(dd))) * attr(se, "sefactor") + alpha <- data.frame(effect = alphacoef, se = sev, obs = dg) } else { - alpha <- data.frame(effect=alphacoef,obs=dg) + alpha <- data.frame(effect = alphacoef, obs = dg) } rownames(alpha) <- nm alpha } -#makedummies <- function(factors) { +# makedummies <- function(factors) { # nm <- c() # dummies <- Matrix(0,0,length(factors[[1]])) # for(i in 1:length(factors)) { @@ -54,115 +54,124 @@ findfe <- function(dd,Rhs,se=FALSE) { # } # rownames(dummies) <- nm # dummies -#} +# } makedd.full <- function(factors) { -# dm <- makedummies(factors) + # dm <- makedummies(factors) dm <- t(makeDmatrix(factors)) nm <- rownames(dm) dd <- tcrossprod(dm) rownames(dd) <- colnames(dd) <- nm - attr(dd,'dummies') <- dm - attr(dd,'nm') <- nm + attr(dd, "dummies") <- dm + attr(dd, "nm") <- nm dd } makeddlist <- function(factors) { - if(length(factors) > 2) { - if(is.null(attr(factors,'references'))) { + if (length(factors) > 2) { + if (is.null(attr(factors, "references"))) { # find references by fiddling with Cholesky - message('*** More than two groups, finding refs by Cholesky pivots, interpret at own risk') + message("*** More than two groups, finding refs by Cholesky pivots, interpret at own risk") # first the full matrix, find small pivots dd <- makedd.full(factors) - orignm <- attr(dd,'nm') + orignm <- attr(dd, "nm") # add small amount to diagonal eps <- sqrt(.Machine$double.eps) - Ch <- try(Cholesky(dd,super=TRUE,perm=TRUE,Imult=eps)) - if(inherits(Ch,'try-error') && grepl('problem too large',geterrmessage())) { - Ch <- Cholesky(dd,super=FALSE,perm=TRUE,Imult=eps) + Ch <- try(Cholesky(dd, super = TRUE, perm = TRUE, Imult = eps)) + if (inherits(Ch, "try-error") && grepl("problem too large", geterrmessage())) { + Ch <- Cholesky(dd, super = FALSE, perm = TRUE, Imult = eps) } # strangely enough, coercing to sparseMatrix doesn't take care of # the permutation, we apply it manually. Let's hope it's never fixed. - rm(dd); gc() + rm(dd) + gc() pivot <- Ch@perm - ch <- as(Ch,'sparseMatrix') - rm(Ch); gc() + ch <- as(Ch, "sparseMatrix") + rm(Ch) + gc() dg <- diag(ch)[order(pivot)]**2 - rm(ch); gc() - refs <- (dg < eps**(1/3)) + rm(ch) + gc() + refs <- (dg < eps**(1 / 3)) refnames <- orignm[refs] - message(paste('***',length(refnames),'references found')) + message(paste("***", length(refnames), "references found")) } else { - refnames <- attr(factors,'references') - orignm <- unlist(lapply(names(factors), - function(n) paste(n,levels(factors[[n]]),sep='.'))) + refnames <- attr(factors, "references") + orignm <- unlist(lapply( + names(factors), + function(n) paste(n, levels(factors[[n]]), sep = ".") + )) } # there may be references in more than one factor # remove all of them # create factor list with named levels - nf <- lapply(names(factors),function(n) { + nf <- lapply(names(factors), function(n) { f <- factors[[n]] - levels(f) <- paste(n,levels(f),sep='.') + levels(f) <- paste(n, levels(f), sep = ".") f }) # remove reference levels, and remove the prefix # find the levels - lev <- lapply(nf,function(f) which(levels(f) %in% refnames)) - nnf <- mapply(function(f,l) factor(f,exclude=levels(f)[l]),factors,lev,SIMPLIFY=FALSE) + lev <- lapply(nf, function(f) which(levels(f) %in% refnames)) + nnf <- mapply(function(f, l) factor(f, exclude = levels(f)[l]), factors, lev, SIMPLIFY = FALSE) dd <- makedd.full(nnf) - attr(dd,'keep') <- 1:length(nnf[[1]]) - attr(dd,'refnames') <- refnames -# attr(dd,'refcnt') <- rep(1,length(refnames)) -# find the number of occurences - cntlst <- unlist(lapply(refnames,function(n) lapply(nf,function(f) sum(f == n)))) - attr(dd,'refcnt') <- cntlst[cntlst > 0] - attr(dd,'comp') <- 1 + attr(dd, "keep") <- 1:length(nnf[[1]]) + attr(dd, "refnames") <- refnames + # attr(dd,'refcnt') <- rep(1,length(refnames)) + # find the number of occurences + cntlst <- unlist(lapply(refnames, function(n) lapply(nf, function(f) sum(f == n)))) + attr(dd, "refcnt") <- cntlst[cntlst > 0] + attr(dd, "comp") <- 1 res <- list(dd) - attr(res,'nm') <- orignm + attr(res, "nm") <- orignm } else { # 2 or fewer factors, find references by component cf <- compfactor(factors) - nml <- lapply(factors,function(f) levels(f)) - nm <- unlist(lapply(names(nml),function(n) paste(n,nml[[n]],sep='.'))) + nml <- lapply(factors, function(f) levels(f)) + nm <- unlist(lapply(names(nml), function(n) paste(n, nml[[n]], sep = "."))) res <- list() li <- 1 -# this loop suffers from too much copying and stuff -# when there are many components (e.g. like 10000) + # this loop suffers from too much copying and stuff + # when there are many components (e.g. like 10000) remfact <- factors fullidx <- 1:length(factors[[1]]) - for(l in levels(cf)) { + for (l in levels(cf)) { # find those in this level keep <- which(cf == l) -# cat(date(),'comp fact',li,'size',length(keep),'\n') - fcomp <- lapply(remfact,function(f) factor(f[keep])) - remfact <- lapply(remfact,function(f) factor(f[-keep])) + # cat(date(),'comp fact',li,'size',length(keep),'\n') + fcomp <- lapply(remfact, function(f) factor(f[keep])) + remfact <- lapply(remfact, function(f) factor(f[-keep])) cf <- factor(cf[-keep]) # then the reference level - maxrefs <- lapply(fcomp,function(f) {tf <- table(f); m <- which.max(tf); tf[m]}) + maxrefs <- lapply(fcomp, function(f) { + tf <- table(f) + m <- which.max(tf) + tf[m] + }) # in which factor rfac <- which.max(unlist(maxrefs)) # which level reflevel <- names(maxrefs[[rfac]]) # drop that level from the factor - fcomp[[rfac]] <- factor(fcomp[[rfac]],exclude=reflevel) - refname <- paste(names(remfact)[[rfac]],reflevel,sep='.') + fcomp[[rfac]] <- factor(fcomp[[rfac]], exclude = reflevel) + refname <- paste(names(remfact)[[rfac]], reflevel, sep = ".") # remove those without levels - len <- unlist(lapply(fcomp,nlevels)) + len <- unlist(lapply(fcomp, nlevels)) fcomp <- fcomp[len > 0] dd <- makedd.full(fcomp) # the keep attribute should be relative to # the full factor, not to remfact - attr(dd,'keep') <- fullidx[keep] + attr(dd, "keep") <- fullidx[keep] fullidx <- fullidx[-keep] - attr(dd,'refnames') <- refname - attr(dd,'refcnt') <- max(unlist(maxrefs)) - attr(dd,'comp') <- li + attr(dd, "refnames") <- refname + attr(dd, "refcnt") <- max(unlist(maxrefs)) + attr(dd, "comp") <- li res[[li]] <- dd - li <- li+1 -# res <- c(res, list(dd)) + li <- li + 1 + # res <- c(res, list(dd)) } - attr(res,'nm') <- nm + attr(res, "nm") <- nm } res } diff --git a/R/compfactor.R b/R/compfactor.R index b552eb4..8f22619 100644 --- a/R/compfactor.R +++ b/R/compfactor.R @@ -6,14 +6,14 @@ #' Find the connected components -#' +#' #' 'compfactor' computes the connected components of the dummy-part of the #' model. -#' -#' If there are more than two factors and \code{WW=FALSE}, only the first two +#' +#' If there are more than two factors and `WW=FALSE`, only the first two #' will be used. -#' -#' If \code{WW=TRUE} and \code{length(fl) > 2}, the component structure will be +#' +#' If `WW=TRUE` and `length(fl) > 2`, the component structure will be #' as in "A Note on the Determination of Connectedness in an N-Way Cross #' Classification" by D.L. Weeks and D.R. Williams, Technometrics, vol 6 no 3, #' August 1964. I.e. in each component, the coefficients within each factor are @@ -22,36 +22,40 @@ #' component, and interpret the coefficients within a component as usual. This #' is not an exhaustion of all the estimable functions. There is somewhat more #' about this in one of the vignettes. -#' +#' #' @param fl a list of factors defining the dummies #' @param WW logical. Use Weeks and Williams components #' @return A factor of the same length as the factors in the input argument. -#' It defines the connected components. E.g. \code{nlevels(compfactor(fl))} +#' It defines the connected components. E.g. `nlevels(compfactor(fl))` #' will yield the number of connected components. #' @examples -#' +#' #' ## create two factors -#' f1 <- factor(sample(300,400,replace=TRUE)) -#' f2 <- factor(sample(300,400,replace=TRUE)) -#' +#' f1 <- factor(sample(300, 400, replace = TRUE)) +#' f2 <- factor(sample(300, 400, replace = TRUE)) +#' #' ## find the components -#' cf <- compfactor(list(f1=f1,f2=f2)) -#' +#' cf <- compfactor(list(f1 = f1, f2 = f2)) +#' #' ## show the third largest component -#' fr <- data.frame(f1,f2,cf) -#' fr[cf==3,] -#' +#' fr <- data.frame(f1, f2, cf) +#' fr[cf == 3, ] +#' #' @export compfactor -compfactor <- function(fl, WW=FALSE) { - if(length(fl) == 0) return(factor(NULL)) +compfactor <- function(fl, WW = FALSE) { + if (length(fl) == 0) { + return(factor(NULL)) + } N <- length(fl[[1]]) - purefls <- sapply(fl,function(f) is.null(attr(f,'x',exact=TRUE))) + purefls <- sapply(fl, function(f) is.null(attr(f, "x", exact = TRUE))) fl <- fl[purefls] - if(length(fl) <= 1) return(factor(rep(1,N))) - if(WW && length(fl) > 2) { - cf <- factor(.Call(C_wwcomp,fl)) + if (length(fl) <= 1) { + return(factor(rep(1, N))) + } + if (WW && length(fl) > 2) { + cf <- factor(.Call(C_wwcomp, fl)) } else { - cf <- factor(.Call(C_conncomp,fl[1:2])) + cf <- factor(.Call(C_conncomp, fl[1:2])) } cf } diff --git a/R/condfstat.R b/R/condfstat.R index a124de8..78a86b1 100644 --- a/R/condfstat.R +++ b/R/condfstat.R @@ -1,87 +1,98 @@ # $Id: condfstat.R 1943 2016-04-07 23:08:38Z sgaure $ -ivbootstrap <- function(z, x, y, quantiles=0.95, N=100L, cluster=NULL) { +ivbootstrap <- function(z, x, y, quantiles = 0.95, N = 100L, cluster = NULL) { # estimate bias as E((z' x)^{-1} z' eps) N <- max(N) - if(!is.null(cluster)) { - if(length(cluster) > 1) - warning('Only a single cluster is supported for IV bootstrap, using ', - names(cluster)[[1]]) + if (!is.null(cluster)) { + if (length(cluster) > 1) { + warning( + "Only a single cluster is supported for IV bootstrap, using ", + names(cluster)[[1]] + ) + } clu <- cluster[[1]] iclu <- as.integer(clu) } -# zortho <- orthonormalize(z) + # zortho <- orthonormalize(z) - pint <- getOption('lfe.pint') + pint <- getOption("lfe.pint") start <- last <- Sys.time() # hmm, can we reduce the number of instruments? n <- 0 - bias <- replicate(N,{ - n <<- n + 1 + bias <- replicate(N, { + n_temp <- n + 1 + assign("n", n_temp, inherits = TRUE) ## replaces n <<- n+1 now <- Sys.time() - if(is.numeric(pint) && pint > 0 && now-last > pint) { - message(date(), ' Iteration ', n , ' of ', N , ' in IV bootstrap') - last <<- now + if (is.numeric(pint) && pint > 0 && now - last > pint) { + message(date(), " Iteration ", n, " of ", N, " in IV bootstrap") + assign("last", now, inherits = TRUE) # replaces: last <<- now } - if(is.null(cluster)) { + if (is.null(cluster)) { # resampling observations for indep residuals - s <- sample(nrow(z),replace=TRUE) + s <- sample(nrow(z), replace = TRUE) } else { # resample entire levels - cl <- sort(sample(nlevels(clu), replace=TRUE)) + cl <- sort(sample(nlevels(clu), replace = TRUE)) # find a faster way to do this: # s <- sort(unlist(sapply(cl, function(ll) which(clu==ll)))) s <- NULL - while(length(cl) > 0) { - s <- c(s,which(iclu %in% cl)) - cl <- cl[c(1L,diff(cl)) == 0] + while (length(cl) > 0) { + s <- c(s, which(iclu %in% cl)) + cl <- cl[c(1L, diff(cl)) == 0] } } - + # draw new instruments - zortho <- orthonormalize(z[s,,drop=FALSE]) + zortho <- orthonormalize(z[s, , drop = FALSE]) # draw new X, and project it - zX <- crossprod(zortho, x[s,,drop=FALSE]) - tryCatch(solve(crossprod(zX), - crossprod(zX, crossprod(zortho, y[s,,drop=FALSE]))), - error=function(e) {warning(e);NULL}) + zX <- crossprod(zortho, x[s, , drop = FALSE]) + tryCatch( + solve( + crossprod(zX), + crossprod(zX, crossprod(zortho, y[s, , drop = FALSE])) + ), + error = function(e) { + warning(e) + NULL + } + ) }) - if(start != last) cat('\n') + if (start != last) cat("\n") - if(is.list(bias)) { + if (is.list(bias)) { # some of them returned NULL, so replicate returned a list # discard those NULLs - bias <- simplify2array(bias[!sapply(bias,is.null)]) + bias <- simplify2array(bias[!sapply(bias, is.null)]) N <- dim(bias)[3] } # estimate quantiles of the bias - if(is.null(quantiles)) { + if (is.null(quantiles)) { res <- bias - res <- aperm(res, c(1,(3:length(dim(res))),2)) + res <- aperm(res, c(1, (3:length(dim(res))), 2)) } else { - qname <- paste(100*round(quantiles,3), '%',sep='') - dm <- 1:(length(dim(bias))-1) - res <- apply(bias,dm,function(s) quantile(s,probs=quantiles,na.rm=TRUE,type=4)) - if(length(quantiles) == 1) { + qname <- paste(100 * round(quantiles, 3), "%", sep = "") + dm <- 1:(length(dim(bias)) - 1) + res <- apply(bias, dm, function(s) quantile(s, probs = quantiles, na.rm = TRUE, type = 4)) + if (length(quantiles) == 1) { dmn <- dimnames(res) - dim(res) <- c(1,dim(res)) - dimnames(res) <- c(list(paste(round(quantiles,3),'%',sep='')),dmn) + dim(res) <- c(1, dim(res)) + dimnames(res) <- c(list(paste(round(quantiles, 3), "%", sep = "")), dmn) } - res <- aperm(res,c(2,1,3:length(dim(res)))) + res <- aperm(res, c(2, 1, 3:length(dim(res)))) } - structure(res, q=quantiles, samples=N) + structure(res, q = quantiles, samples = N) } - # From "A weak instrument F-test in linear iv models with multiple - # " endogenous variables", Sanderson & Windmeijer, Disc. Paper 14/644 U of Bristol, 2014 - # pp 22-23. There's an error in how tilde delta is computed at p. 23. - # it should be \hat X_{-j}, not X_{-j}. I.e. coefs for \hat X_{-j} when regressing - # x_j. But we should predict with with X_{-j} - # I.e. estimate delta in x_j = \hat X_{-j} * delta + eps - # Regress x_j - X_{-j}*delta = Z * kappa + xi - # wald test on kappa, with df = max(1,kz-#endog+1) +# From "A weak instrument F-test in linear iv models with multiple +# " endogenous variables", Sanderson & Windmeijer, Disc. Paper 14/644 U of Bristol, 2014 +# pp 22-23. There's an error in how tilde delta is computed at p. 23. +# it should be \hat X_{-j}, not X_{-j}. I.e. coefs for \hat X_{-j} when regressing +# x_j. But we should predict with with X_{-j} +# I.e. estimate delta in x_j = \hat X_{-j} * delta + eps +# Regress x_j - X_{-j}*delta = Z * kappa + xi +# wald test on kappa, with df = max(1,kz-#endog+1) @@ -90,13 +101,13 @@ ivbootstrap <- function(z, x, y, quantiles=0.95, N=100L, cluster=NULL) { #' Compute conditional F statistic for weak instruments in an IV-estimation #' with multiple endogenous variables -#' +#' #' When using multiple instruments for multiple endogenous variables, the #' ordinary individual t-tests for the instruments in the first stage do not #' always reveal a weak set of instruments. Conditional F statistics can be #' used for such testing. -#' -#' +#' +#' #' IV coefficient estimates are not normally distributed, in particular they do #' not have the right expectation. They follow a quite complicated #' distribution which is fairly close to normal if the instruments are good. @@ -106,131 +117,134 @@ ivbootstrap <- function(z, x, y, quantiles=0.95, N=100L, cluster=NULL) { #' small relative to the bias in OLS. See \cite{Sanderson and Windmeijer #' (2014)} and \cite{Stock and Yogo (2004)}. If F is small, the bias can be #' large compared to the standard error. -#' -#' If \code{any(quantiles > 0.0)}, a bootstrap with \code{bN} samples will be +#' +#' If `any(quantiles > 0.0)`, a bootstrap with `bN` samples will be #' performed to estimate quantiles of the endogenous parameters which includes #' the variance both from the 1st and 2nd stage. The result is returned in an -#' array attribute \code{quantiles} of the value returned by \code{condfstat}. -#' The argument \code{quantiles} can be a vector to estimate more than one -#' quantile at once. If \code{quantiles=NULL}, the bootstrapped estimates +#' array attribute `quantiles` of the value returned by `condfstat`. +#' The argument `quantiles` can be a vector to estimate more than one +#' quantile at once. If `quantiles=NULL`, the bootstrapped estimates #' themselves are returned. The bootstrap is normally much faster than running -#' \code{felm} over and over again. This is so because all exogenous variables +#' `felm` over and over again. This is so because all exogenous variables #' are projected out of the equations before doing the bootstrap. -#' -#' @param object object of class \code{"felm"}, a result of a call to -#' \code{\link{felm}}. -#' @param type character. Error structure. Passed to \code{\link{waldtest}}. If -#' \code{NULL}, both iid and robust Fs are returned. +#' +#' @param object object of class `"felm"`, a result of a call to +#' [felm()]. +#' @param type character. Error structure. Passed to [waldtest()]. If +#' `NULL`, both iid and robust Fs are returned. #' @param quantiles numeric. Quantiles for bootstrap. #' @param bN integer. Number of bootstrap samples. #' @return A p x k matrix, where k is the number of endogenous variables. Each #' row are the conditional F statistics on a residual equation as described in #' \cite{Sanderson and Windmeijer (2014)}, for a certain error structure. The #' default is to use iid, or cluster if a cluster was specified to -#' \code{\link{felm}}. The third choice is \code{'robust'}, for heteroskedastic -#' errors. If \code{type=NULL}, iid and robust Fs are returned, and cluster, if -#' that was specified to \code{felm}. -#' +#' [felm()]. The third choice is `'robust'`, for heteroskedastic +#' errors. If `type=NULL`, iid and robust Fs are returned, and cluster, if +#' that was specified to `felm`. +#' #' Note that for these F statistics it is not the p-value that matters, it is #' the F statistic itself which (coincidentally) pops up in the denominator for #' the asymptotic bias of the IV estimates, and thus a large F is beneficial. -#' @note Please note that \code{condfstat} does not work with the old syntax -#' for IV in \code{\link{felm}(...,iv=)}. The new multipart syntax must be +#' @note Please note that `condfstat` does not work with the old syntax +#' for IV in `[felm](...,iv=)`. The new multipart syntax must be #' used. #' @references Sanderson, E. and F. Windmeijer (2014) \cite{A weak instrument #' F-test in linear IV models with multiple endogenous variables}, Journal of #' Econometrics, 2015. -#' \url{http://www.sciencedirect.com/science/article/pii/S0304407615001736} -#' +#' +#' #' Stock, J.H. and M. Yogo (2004) \cite{Testing for weak instruments in linear -#' IV regression}, \url{http://ssrn.com/abstract=1734933} in +#' IV regression}, in #' \cite{Identification and inference for econometric models: Essays in honor #' of Thomas Rothenberg}, 2005. #' @examples -#' +#' #' z1 <- rnorm(4000) #' z2 <- rnorm(length(z1)) #' u <- rnorm(length(z1)) #' # make x1, x2 correlated with errors u -#' -#' x1 <- z1 + z2 + 0.2*u + rnorm(length(z1)) -#' x2 <- z1 + 0.94*z2 - 0.3*u + rnorm(length(z1)) +#' +#' x1 <- z1 + z2 + 0.2 * u + rnorm(length(z1)) +#' x2 <- z1 + 0.94 * z2 - 0.3 * u + rnorm(length(z1)) #' y <- x1 + x2 + u #' est <- felm(y ~ 1 | 0 | (x1 | x2 ~ z1 + z2)) #' summary(est) #' \dontrun{ -#' summary(est$stage1, lhs='x1') -#' summary(est$stage1, lhs='x2') +#' summary(est$stage1, lhs = "x1") +#' summary(est$stage1, lhs = "x2") #' } -#' +#' #' # the joint significance of the instruments in both the first stages are ok: -#' t(sapply(est$stage1$lhs, function(lh) waldtest(est$stage1, ~z1|z2, lhs=lh))) -#' # everything above looks fine, t-tests for instruments, +#' t(sapply(est$stage1$lhs, function(lh) waldtest(est$stage1, ~ z1 | z2, lhs = lh))) +#' # everything above looks fine, t-tests for instruments, #' # as well as F-tests for excluded instruments in the 1st stages. #' # The conditional F-test reveals that the instruments are jointly weak #' # (it's close to being only one instrument, z1+z2, for both x1 and x2) -#' condfstat(est, quantiles=c(0.05, 0.95)) -#' +#' condfstat(est, quantiles = c(0.05, 0.95)) +#' #' @export condfstat -condfstat <- function(object, type='default', quantiles=0.0, bN=100L) { +condfstat <- function(object, type = "default", quantiles = 0.0, bN = 100L) { est <- object st1 <- est$stage1 - if(is.null(st1)) - stop('Conditional F statistic only makes sense for iv-estimation') + if (is.null(st1)) { + stop("Conditional F statistic only makes sense for iv-estimation") + } - if(is.null(type)) { - types <- c('iid','robust') - if(!is.null(est$clustervar)) types <- c(types,'cluster') + if (is.null(type)) { + types <- c("iid", "robust") + if (!is.null(est$clustervar)) types <- c(types, "cluster") } else { - if(identical(type,'default')) - types <- if(is.null(est$clustervar)) 'iid' else 'cluster' - else - types <- type + if (identical(type, "default")) { + types <- if (is.null(est$clustervar)) "iid" else "cluster" + } else { + types <- type + } } - if(length(st1$lhs) == 1) { + if (length(st1$lhs) == 1) { # only a single endogenous variable # reduce to ordinary F-test df1 <- nrow(st1$coefficients) result <- as.matrix(sapply(types, function(typ) { - waldtest(st1,st1$instruments, df1=df1, type=typ)['F'] + waldtest(st1, st1$instruments, df1 = df1, type = typ)["F"] })) - dimnames(result) <- list(st1$lhs,paste(types,'F')) - return(structure(t(result),df1=df1)) + dimnames(result) <- list(st1$lhs, paste(types, "F")) + return(structure(t(result), df1 = df1)) } # first, transform away all the exogenous variables from # instruments, endogenous variables and predicted endogenous variables # there may be an intercept in ivx, we remove it - keep <- !(colnames(st1$ivx) %in% '(Intercept)') - if(all(keep)) ivx <- st1$ivx else ivx <- st1$ivx[,keep, drop=FALSE] + keep <- !(colnames(st1$ivx) %in% "(Intercept)") + if (all(keep)) ivx <- st1$ivx else ivx <- st1$ivx[, keep, drop = FALSE] inames <- colnames(ivx) y <- cbind(st1$ivy, ivx, st1$c.fitted.values, est$c.response) fitnames <- makefitnames(colnames(st1$c.fitted.values)) setdimnames(y, list(NULL, c(colnames(st1$ivy), inames, fitnames, colnames(est$c.response)))) - mm <- list(y=y, x=st1$centred.exo) - tvars <- newols(mm, nostats=TRUE)$residuals - rm(y,mm) + mm <- list(y = y, x = st1$centred.exo) + tvars <- newols(mm, nostats = TRUE)$residuals + rm(y, mm) # should we estimate the relative bias? - if(any(quantiles > 0) || is.null(quantiles)) { + if (any(quantiles > 0) || is.null(quantiles)) { # use the predicted variables as instruments? - z <- tvars[,colnames(ivx),drop=FALSE] -# z <- xhat - x <- tvars[,colnames(st1$ivy),drop=FALSE] - y <- tvars[,colnames(est$c.response), drop=FALSE] - bias <- ivbootstrap(z,x,y, - quantiles=quantiles,N=bN,cluster=est$clustervar) - rm(z,x,y) + z <- tvars[, colnames(ivx), drop = FALSE] + # z <- xhat + x <- tvars[, colnames(st1$ivy), drop = FALSE] + y <- tvars[, colnames(est$c.response), drop = FALSE] + bias <- ivbootstrap(z, x, y, + quantiles = quantiles, N = bN, cluster = est$clustervar + ) + rm(z, x, y) # Now, bias contains the bias distribution # we subtract it from the estimate in object$coefficients quant <- bias -# cf <- object$coefficients[fitnames,,drop=FALSE] -# quant <- sapply(seq_len(ncol(cf)), function(i) cf[,i] + bias[,,i,drop=FALSE]) -# attributes(quant) <- attributes(bias) - + # cf <- object$coefficients[fitnames,,drop=FALSE] + # quant <- sapply(seq_len(ncol(cf)), function(i) cf[,i] + bias[,,i,drop=FALSE]) + # attributes(quant) <- attributes(bias) + quant <- drop(quant) } else { quant <- NULL @@ -241,9 +255,9 @@ condfstat <- function(object, type='default', quantiles=0.0, bN=100L) { evrest <- st1$lhs[!(st1$lhs %in% ev)] hatev <- makefitnames(ev) hatevrest <- makefitnames(evrest) - Xlhs <- tvars[,ev,drop=FALSE] - Xhatrest <- tvars[,hatevrest,drop=FALSE] - Xrest <- tvars[,evrest,drop=FALSE] + Xlhs <- tvars[, ev, drop = FALSE] + Xhatrest <- tvars[, hatevrest, drop = FALSE] + Xrest <- tvars[, evrest, drop = FALSE] Xlhs - Xrest %*% solve(crossprod(Xhatrest), t(Xhatrest) %*% Xlhs) }) @@ -251,21 +265,22 @@ condfstat <- function(object, type='default', quantiles=0.0, bN=100L) { # then regress on the instruments - mm <- list(y = resid, x=tvars[,inames], cluster=est$clustervar) - z <- newols(mm, nostats=FALSE) + mm <- list(y = resid, x = tvars[, inames], cluster = est$clustervar) + z <- newols(mm, nostats = FALSE) - df1 <- nrow(z$coefficients)-length(z$lhs)+1 + df1 <- nrow(z$coefficients) - length(z$lhs) + 1 result <- as.matrix(sapply(types, function(typ) { - sapply(z$lhs, function(lh) waldtest(z,inames,lhs=lh, df1=df1, type=typ)['F']) + sapply(z$lhs, function(lh) waldtest(z, inames, lhs = lh, df1 = df1, type = typ)["F"]) })) - dimnames(result) <- list(z$lhs,paste(types,'F')) - structure(t(result),df1=df1, quantiles=quant) + dimnames(result) <- list(z$lhs, paste(types, "F")) + structure(t(result), df1 = df1, quantiles = quant) } - -oldcondfstat <- function(object, type='default') { + +oldcondfstat <- function(object, type = "default") { est <- object - if(is.null(est$stage1)) - stop('Conditional F statistic only makes sense for iv-estimation') + if (is.null(est$stage1)) { + stop("Conditional F statistic only makes sense for iv-estimation") + } # for each endogenous variable x_j, we move it to the lhs, and estimate the # residuals with the other endogenous vars as explanatory variables. @@ -275,22 +290,22 @@ oldcondfstat <- function(object, type='default') { # a conditional test for each endogenous variable. st1 <- est$stage1 - if(length(st1$lhs) == 1) { + if (length(st1$lhs) == 1) { # only a single endogenous variable # reduce to ordinary F-test - W <- waldtest(st1,st1$instruments) - return(structure(W['F'],df1=W['df1'])) + W <- waldtest(st1, st1$instruments) + return(structure(W["F"], df1 = W["df1"])) } cl1 <- st1$call newcl <- cl <- est$call - Form <- as.Formula(cl[['formula']]) - baseform <- as.Formula(formula(Form,lhs=0,rhs=1:2)) - ivForm <- as.Formula(formula(Form,lhs=0,rhs=3)[[2]][[2]]) - endogForm <- formula(ivForm, lhs=NULL, rhs=0)[[2]] + Form <- as.Formula(cl[["formula"]]) + baseform <- as.Formula(formula(Form, lhs = 0, rhs = 1:2)) + ivForm <- as.Formula(formula(Form, lhs = 0, rhs = 3)[[2]][[2]]) + endogForm <- formula(ivForm, lhs = NULL, rhs = 0)[[2]] endogvars <- all.vars(endogForm) - instrvars <- st1$instruments #all.vars(formula(ivForm, lhs=0, rhs=NULL)[[2]]) + instrvars <- st1$instruments # all.vars(formula(ivForm, lhs=0, rhs=NULL)[[2]]) fitvars <- st1$endogvars @@ -300,7 +315,7 @@ oldcondfstat <- function(object, type='default') { # We create a new environment based on the old, but our new # residuals into the new environment - fitenv <- environment(est$st2call[['formula']]) + fitenv <- environment(est$st2call[["formula"]]) # an environment to store the new residual variables # we can get rid of the exogeneous residuals by @@ -317,15 +332,17 @@ oldcondfstat <- function(object, type='default') { # put endogenous variables, their predictions from 1st stage, and the # instruments on the lhs, with the exogenous variables on the rhs cvars <- c(endogvars, fitvars, instrvars) - cForm <- as.Formula(formula(paste(paste(cvars, collapse='|'),'~0'))) + cForm <- as.Formula(formula(paste(paste(cvars, collapse = "|"), "~0"))) - cForm <- as.Formula(update(cForm, as.formula(substitute(. ~ X, - list(X=baseform[[2]]))))) + cForm <- as.Formula(update(cForm, as.formula(substitute( + . ~ X, + list(X = baseform[[2]]) + )))) projcall <- cl environment(cForm) <- fitenv - projcall[['formula']] <- cForm - projcall[['nostats']] <- TRUE - projest <- eval(projcall, envir=est$parent.frame) + projcall[["formula"]] <- cForm + projcall[["nostats"]] <- TRUE + projest <- eval(projcall, envir = est$parent.frame) # store residuals in environment under name 'x..(noexo).. # We store the residuals we need for the next regression @@ -334,26 +351,26 @@ oldcondfstat <- function(object, type='default') { # and the projected instruments. lhsvars <- colnames(st1$residuals) - endogfitvars <- paste(lhsvars,'(fit)',sep='') - for(ev in seq_along(lhsvars)) { + endogfitvars <- paste(lhsvars, "(fit)", sep = "") + for (ev in seq_along(lhsvars)) { # do an OLS to compute delta, but implement just with crossprod on the right matrix evnam <- lhsvars[ev] restfitnam <- endogfitvars[-ev] restnam <- lhsvars[-ev] - orig <- projest$residuals[,evnam,drop=FALSE] - Xjfit <- projest$residuals[,restfitnam,drop=FALSE] - Xj <- projest$residuals[,restnam,drop=FALSE] + orig <- projest$residuals[, evnam, drop = FALSE] + Xjfit <- projest$residuals[, restfitnam, drop = FALSE] + Xj <- projest$residuals[, restnam, drop = FALSE] delta <- solve(crossprod(Xjfit), t(Xjfit) %*% orig) resid <- orig - Xj %*% delta # these are the left hand sides in the stage 2 regression below - resnam <- paste(evnam,'..(noexo)..',sep='') - assign(resnam, resid, envir=noexo) + resnam <- paste(evnam, "..(noexo)..", sep = "") + assign(resnam, resid, envir = noexo) } # store the residual instruments in noexo also - for(instr in instrvars) { - resnam <- paste(instr,'..(noexo)..',sep='') - assign(resnam, projest$residuals[,instr,drop=FALSE], envir=noexo) + for (instr in instrvars) { + resnam <- paste(instr, "..(noexo)..", sep = "") + assign(resnam, projest$residuals[, instr, drop = FALSE], envir = noexo) } # ditch projest to save memory, we don't need it anymore rm(projest) @@ -361,23 +378,26 @@ oldcondfstat <- function(object, type='default') { # compute concentration parameter matrix mu # From Stock, Wright, Yogo section 4.2 p .522 # do this when I have the time to do it. - + # Then do the regression on the endog. residuals in resenv # on the lhs, and projected instruments from # projest on the rhs # make a formula # Should we do clustering? - resendog <- paste('`',lhsvars,'..(noexo)..`',sep='') - resinst <- paste('`',instrvars,'..(noexo)..`',sep='') - if(length(Form)[2] == 4) { - cluform <- formula(formula(Form,lhs=0,rhs=4))[[2]] - F1 <- as.Formula(formula(paste(paste(resendog,collapse='|'),'~', - paste(resinst,collapse='+'),'+0|0|0|0'))) - newform <- update(F1,as.formula(substitute(. ~ . |.|.|C, list(C=cluform)))) - + resendog <- paste("`", lhsvars, "..(noexo)..`", sep = "") + resinst <- paste("`", instrvars, "..(noexo)..`", sep = "") + if (length(Form)[2] == 4) { + cluform <- formula(formula(Form, lhs = 0, rhs = 4))[[2]] + F1 <- as.Formula(formula(paste( + paste(resendog, collapse = "|"), "~", + paste(resinst, collapse = "+"), "+0|0|0|0" + ))) + newform <- update(F1, as.formula(substitute(. ~ . | . | . | C, list(C = cluform)))) } else { - newform <- as.Formula(formula(paste(paste(resendog,collapse='|'),'~', - paste(resinst,collapse='+'),'+0'))) + newform <- as.Formula(formula(paste( + paste(resendog, collapse = "|"), "~", + paste(resinst, collapse = "+"), "+0" + ))) } # everything except the cluster var is in noexo @@ -385,16 +405,16 @@ oldcondfstat <- function(object, type='default') { # of noexo. We must call felm with the original data. Hopefully our # constructed names ..(residual).. are not present there environment(newform) <- noexo - m <- match(c('formula','data'), names(cl),0L) - newcl <- cl[c(1L,m)] - newcl[['formula']] <- newform - e1 <- eval(newcl,est$parent.frame) + m <- match(c("formula", "data"), names(cl), 0L) + newcl <- cl[c(1L, m)] + newcl[["formula"]] <- newform + e1 <- eval(newcl, est$parent.frame) - df1 <- length(instrvars)-length(lhsvars)+1 + df1 <- length(instrvars) - length(lhsvars) + 1 res <- sapply(e1$lhs, function(lh) { - waldtest(e1,resinst,lhs=lh, df1=df1, type=type)['F'] + waldtest(e1, resinst, lhs = lh, df1 = df1, type = type)["F"] }) - attr(res,'df1') <- df1 + attr(res, "df1") <- df1 names(res) <- lhsvars res } diff --git a/R/demeanlist.R b/R/demeanlist.R index c9df379..c331ecf 100644 --- a/R/demeanlist.R +++ b/R/demeanlist.R @@ -3,13 +3,13 @@ #' @description #' Uses the method of alternating projections to centre #' a (model) matrix on multiple groups, as specified by a list of factors. -#' This function is called by \code{\link{felm}}, but it has been +#' This function is called by [felm()], but it has been #' made available as standalone in case it's needed. In particular, if #' one does not need transformations provided by R-formulas but have the covariates present #' as a matrix or a data.frame, a substantial amount of time can be saved in the centering. #' @export -#\usage{ -#demeanlist(mtx, fl, icpt=0, eps=getOption('lfe.eps'), +# \usage{ +# demeanlist(mtx, fl, icpt=0, eps=getOption('lfe.eps'), # threads=getOption('lfe.threads'), # progress=getOption('lfe.pint'), # accel=getOption('lfe.accel'), @@ -18,7 +18,7 @@ # weights=NULL, # scale=TRUE, # .inplace=FALSE) -#} +# } #' @param mtx matrix whose columns form vectors to be group-centred. mtx #' can also be a list of vectors or matrices, such as a data frame. @@ -28,26 +28,26 @@ #' @param eps a tolerance for the centering. #' @param threads an integer specifying the number of threads to use. #' @param progress integer. If positive, make progress reports (whenever a -#' vector is centered, but not more often than every \code{progress} minutes). +#' vector is centered, but not more often than every `progress` minutes). #' @param accel integer. Set to 1 if Gearhart-Koshy acceleration should be done. #' @param randfact logical. Should the order of the factors be randomized? #' This may improve convergence. #' @param means logical. Should the means instead of the demeaned matrix be -#' returned? Setting \code{means=TRUE} will return \code{mtx - demeanlist(mtx,...)}, +#' returned? Setting `means=TRUE` will return `mtx - demeanlist(mtx,...)`, #' but without the extra copy. #' @param weights numeric. For weighted demeaning. #' @param scale logical. Specify scaling for weighted demeaning. -#' @param na.rm logical which indicates what should happen when the data -#' contain \code{NA}s. If TRUE, rows in the input \code{mtx} are removed +#' @param na.rm logical which indicates what should happen when the data +#' contain `NA`s. If TRUE, rows in the input `mtx` are removed #' prior to centering. If FALSE, they are kept, leading to entire groups becoming NA -#' in the output. -#' @param attrs list. List of attributes which should be attached to the output. +#' in the output. +#' @param attrs list. List of attributes which should be attached to the output. #' Used internally. #' #' @details -#' For each column \code{y} in \code{mtx}, the equivalent of the -#' following centering is performed, with \code{cy} as the result. -#' \preformatted{ +#' For each column `y` in `mtx`, the equivalent of the +#' following centering is performed, with `cy` as the result. +#' \preformatted{ #' cy <- y; oldy <- y-1 #' while(sqrt(sum((cy-oldy)**2)) >= eps) { #' oldy <- cy @@ -55,110 +55,110 @@ #' } #' } #' -#' Each factor in \code{fl} may contain an -#' attribute \code{'x'} which is a numeric vector of the same length as +#' Each factor in `fl` may contain an +#' attribute `'x'` which is a numeric vector of the same length as #' the factor. The centering is then not done on the means of each group, #' but on the projection onto the covariate in each group. That is, with a -#' covariate \code{x} and a factor \code{f}, it is like projecting out the -#' interaction \code{x:f}. The \code{'x'} attribute can also be a matrix of column +#' covariate `x` and a factor `f`, it is like projecting out the +#' interaction `x:f`. The `'x'` attribute can also be a matrix of column #' vectors, in this case it can be beneficial to orthogonalize the columns, #' either with a stabilized Gram-Schmidt method, or with the simple -#' method \code{x \%*\% solve(chol(crossprod(x)))}. +#' method `x \%*\% solve(chol(crossprod(x)))`. #' -#' The \code{weights} argument is used if a weighted projection is -#' computed. If \eqn{W} is the diagonal matrix with \code{weights} on the -#' diagonal, \code{demeanlist} computes \eqn{W^{-1} M_{WD} W x} where \eqn{x} is -#' the input vector, \eqn{D} is the matrix of dummies from \code{fl} and +#' The `weights` argument is used if a weighted projection is +#' computed. If \eqn{W} is the diagonal matrix with `weights` on the +#' diagonal, `demeanlist` computes \eqn{W^{-1} M_{WD} W x} where \eqn{x} is +#' the input vector, \eqn{D} is the matrix of dummies from `fl` and #' \eqn{M_{WD}} is the projection on the orthogonal complement of #' the range of the matrix \eqn{WD}. It is possible to implement the -#' weighted projection with the \code{'x'} attribute mentioned above, but +#' weighted projection with the `'x'` attribute mentioned above, but #' it is a separate argument for convenience. -#' If \code{scale=FALSE}, \code{demeanlist} computes \eqn{M_{WD} x} without +#' If `scale=FALSE`, `demeanlist` computes \eqn{M_{WD} x} without #' any \eqn{W} scaling. -#' If \code{length(scale) > 1}, then \code{scale[1]} specifies whether -#' the input should be scaled by \eqn{W}, and \code{scale[2]} specifies +#' If `length(scale) > 1`, then `scale[1]` specifies whether +#' the input should be scaled by \eqn{W}, and `scale[2]` specifies #' whether the output should be scaled by \eqn{W^{-1}}. This is just #' a convenience to save some memory copies in other functions in the package. -#' -#' Note that for certain large datasets the overhead in \code{\link{felm}} -#' is large compared to the time spent in \code{demeanlist}. If the data +#' +#' Note that for certain large datasets the overhead in [felm()] +#' is large compared to the time spent in `demeanlist`. If the data #' are present directly without having to use the formula-interface to -#' \code{felm} for transformations etc, it is possible to run -#' \code{demeanlist} directly on a matrix or \code{"data.frame"} and do the +#' `felm` for transformations etc, it is possible to run +#' `demeanlist` directly on a matrix or `"data.frame"` and do the #' OLS "manually", e.g. with something like -#' \code{cx <- demeanlist(x,...); beta <- solve(crossprod(cx), crossprod(cx,y))} -#' +#' `cx <- demeanlist(x,...); beta <- solve(crossprod(cx), crossprod(cx,y))` +#' #' In some applications it is known that a single centering iteration is -#' sufficient. In particular, if \code{length(fl)==1} and there is no -#' interaction attribute \code{x}. In this case the centering algorithm is +#' sufficient. In particular, if `length(fl)==1` and there is no +#' interaction attribute `x`. In this case the centering algorithm is #' terminated after the first iteration. There may be other cases, e.g. if -#' there is a single factor with a matrix \code{x} with orthogonal columns. If +#' there is a single factor with a matrix `x` with orthogonal columns. If #' you have such prior knowledge, it is possible to force termination after -#' the first iteration by adding an attribute \code{attr(fl, 'oneiter') <- -#' TRUE}. Convergence will be reached in the second iteration anyway, but +#' the first iteration by adding an attribute `attr(fl, 'oneiter') <- +#' TRUE`. Convergence will be reached in the second iteration anyway, but #' you save one iteration, i.e. you double the speed. -#' +#' #' @return -#' If \code{mtx} is a matrix, a matrix of the same shape, possibly with -#' column \code{icpt} deleted. -#' -#' If \code{mtx} is a list of vectors and matrices, a list of the same +#' If `mtx` is a matrix, a matrix of the same shape, possibly with +#' column `icpt` deleted. +#' +#' If `mtx` is a list of vectors and matrices, a list of the same #' length is returned, with the same vector and matrix-pattern, but the -#' matrices have the column \code{icpt} deleted. -#' -#' If \code{mtx} is a \code{'data.frame'}, a \code{'data.frame'} -#' with the same names is returned; the \code{icpt} argument is ignored. -#' -#' If \code{na.rm} is specified, the return value has an attribute \code{'na.rm'} with a vector of +#' matrices have the column `icpt` deleted. +#' +#' If `mtx` is a `'data.frame'`, a `'data.frame'` +#' with the same names is returned; the `icpt` argument is ignored. +#' +#' If `na.rm` is specified, the return value has an attribute `'na.rm'` with a vector of #' row numbers which has been removed. In case the input is a matrix or list, the same rows #' are removed from all of them. Note that removing NAs incurs a copy of the input, so if #' memory usage is an issue and many runs are done, one might consider removing NAs from the data set entirely. #' @note -#' The \code{accel} argument enables Gearhart-Koshy acceleration as +#' The `accel` argument enables Gearhart-Koshy acceleration as #' described in Theorem 3.16 by Bauschke, Deutsch, Hundal and Park in "Accelerating the #' convergence of the method of alternating projections", #' Trans. Amer. Math. Soc. 355 pp 3433-3461 (2003). #' -#' \code{demeanlist} will use an in place transform to save memory, provided the \code{mtx} +#' `demeanlist` will use an in place transform to save memory, provided the `mtx` #' argument is unnamed. Thus, as always in R, you shouldn't use temporary variables -#' like \code{tmp <- fun(x[v,]); bar <- demeanlist(tmp,...); rm(tmp)}, it will be much better to -#' do \code{bar <- demeanlist(fun(x[v,]),...)}. However, demeanlist allows a construction like -#' \code{bar <- demeanlist(unnamed(tmp),...)} which will use an in place transformation, i.e. tmp +#' like `tmp <- fun(x[v,]); bar <- demeanlist(tmp,...); rm(tmp)`, it will be much better to +#' do `bar <- demeanlist(fun(x[v,]),...)`. However, demeanlist allows a construction like +#' `bar <- demeanlist(unnamed(tmp),...)` which will use an in place transformation, i.e. tmp #' will be modified, quite contrary to the usual semantics of R. -#' +#' #' @examples -#' oldopts <- options(lfe.threads=1) +#' oldopts <- options("lfe.threads") +#' options(lfe.threads = 2) #' ## create a matrix -#' mtx <- data.frame(matrix(rnorm(999),ncol=3)) +#' mtx <- data.frame(matrix(rnorm(999), ncol = 3)) #' # a list of factors -#' rgb <- c('red','green','blue') -#' fl <- replicate(4, factor(sample(rgb,nrow(mtx),replace=TRUE)), simplify=FALSE) -#' names(fl) <- paste('g',seq_along(fl),sep='') +#' rgb <- c("red", "green", "blue") +#' fl <- replicate(4, factor(sample(rgb, nrow(mtx), replace = TRUE)), simplify = FALSE) +#' names(fl) <- paste("g", seq_along(fl), sep = "") #' # centre on all means -#' mtx0 <- demeanlist(mtx,fl) -#' head(data.frame(mtx0,fl)) +#' mtx0 <- demeanlist(mtx, fl) +#' head(data.frame(mtx0, fl)) #' # verify that the group means for the columns are zero -#' lapply(fl, function(f) apply(mtx0,2,tapply,f,mean)) +#' lapply(fl, function(f) apply(mtx0, 2, tapply, f, mean)) #' options(oldopts) -demeanlist <- function(mtx,fl,icpt=0L,eps=getOption('lfe.eps'), - threads=getOption('lfe.threads'), - progress=getOption('lfe.pint'), - accel=getOption('lfe.accel'), - randfact=TRUE, - means=FALSE, - weights=NULL, - scale=TRUE, - na.rm=FALSE, - attrs=NULL) { - - if(length(fl) == 0) { - if(means) { +demeanlist <- function(mtx, fl, icpt = 0L, eps = getOption("lfe.eps"), + threads = getOption("lfe.threads"), + progress = getOption("lfe.pint"), + accel = getOption("lfe.accel"), + randfact = TRUE, + means = FALSE, + weights = NULL, + scale = TRUE, + na.rm = FALSE, + attrs = NULL) { + if (length(fl) == 0) { + if (means) { foo <- unlist(utils::as.relistable(mtx)) foo[] <- 0 return(utils::relist(foo)) } return(eval.parent(substitute(mtx))) -# return(mtx) + # return(mtx) } # Here we used to just .Call(C_demeanlist, mtx, fl,...) @@ -169,69 +169,72 @@ demeanlist <- function(mtx,fl,icpt=0L,eps=getOption('lfe.eps'), mf <- match.call() ff <- formals(sys.function()) # This is the argument sent to C_demeanlist, in this order: - ff <- ff[match(c("mtx","fl","icpt","eps","threads","progress","accel","means","weights","scale","attrs"), - names(ff), 0L)] + ff <- ff[match( + c("mtx", "fl", "icpt", "eps", "threads", "progress", "accel", "means", "weights", "scale", "attrs"), + names(ff), 0L + )] m <- names(ff)[names(ff) %in% names(mf)] ff[m] <- mf[m] # make a new environment to store our reordered fl, enclosed by our caller's frame # This is just to avoid clutter in case of error messages - env <- new.env(parent=parent.frame()) - assign('C_demeanlist',C_demeanlist,envir=env) - assign('unnamed',unnamed,envir=env) - assign('.fl',eval.parent(ff[['fl']]), envir=env) + env <- new.env(parent = parent.frame()) + assign("C_demeanlist", C_demeanlist, envir = env) + assign("unnamed", unnamed, envir = env) + assign(".fl", eval.parent(ff[["fl"]]), envir = env) .fl <- NULL # avoid check warning - ff[['fl']] <- quote(.fl) - if(randfact && length(get('.fl',env)) > 2) { + ff[["fl"]] <- quote(.fl) + if (randfact && length(get(".fl", env)) > 2) { # This is delayed for the bizarre reason that before we rewrote this code # mtx was forced before reordering of fl, so we just keep it that way to avoid # changing the test output. - delayedAssign('..fl',.fl[order(runif(length(.fl)))],env,env) - ff[['fl']] <- quote(..fl) + delayedAssign("..fl", .fl[order(runif(length(.fl)))], env, env) + ff[["fl"]] <- quote(..fl) } # Then some NA-handling, at the request of David Hugh-Jones, June 11, 2018 badrows <- NULL delist <- FALSE isDT <- FALSE - if(isTRUE(na.rm)) { + if (isTRUE(na.rm)) { # we need to touch and copy the mtx and fl # mtx is either a vector, a matrix or a list (data.frame/data.table) of such - mtx <- eval.parent(ff[['mtx']]) - if(!is.list(mtx)) {mtx <- list(mtx); delist <- TRUE} + mtx <- eval.parent(ff[["mtx"]]) + if (!is.list(mtx)) { + mtx <- list(mtx) + delist <- TRUE + } # find bad rows in any of the elements of mtx badrows <- NULL - for(i in seq_along(mtx)) { + for (i in seq_along(mtx)) { object <- mtx[[i]] d <- dim(object) - if (length(d) > 2L) next + if (length(d) > 2L) next bad <- seq_along(object)[is.na(object)] if (length(bad) == 0L) next if (length(d)) { - bad <- unique(((bad - 1)%%d[1L]) + 1L) - } - badrows <- union(badrows,bad) + bad <- unique(((bad - 1) %% d[1L]) + 1L) + } + badrows <- union(badrows, bad) } - if(length(badrows) > 0) { + if (length(badrows) > 0) { isDF <- is.data.frame(mtx) - newmtx <- lapply(mtx,function(m) if(length(dim(m)) > 1) m[-badrows,,drop=FALSE] else m[-badrows]) + newmtx <- lapply(mtx, function(m) if (length(dim(m)) > 1) m[-badrows, , drop = FALSE] else m[-badrows]) rm(mtx) - if(isDF) newmtx <- as.data.frame(newmtx) - if(delist) newmtx <- newmtx[[1]] - fl <- eval(ff[['fl']],env) + if (isDF) newmtx <- as.data.frame(newmtx) + if (delist) newmtx <- newmtx[[1]] + fl <- eval(ff[["fl"]], env) N <- length(fl[[1]]) newfl <- lapply(fl, function(f) factor(f[-badrows])) - assign('.mtx',newmtx,envir=env) - assign('.fl',newfl,envir=env) - ff[['fl']] <- quote(.fl) + assign(".mtx", newmtx, envir = env) + assign(".fl", newfl, envir = env) + ff[["fl"]] <- quote(.fl) rm(newfl) - ff[['attrs']] <- c(ff[['attrs']],list(na.rm=sort(badrows))) - ff[['mtx']] <- quote(unnamed(.mtx)) + ff[["attrs"]] <- c(ff[["attrs"]], list(na.rm = sort(badrows))) + ff[["mtx"]] <- quote(unnamed(.mtx)) } else { - assign('.mtx',mtx,envir=env) - ff[['mtx']] <- quote(.mtx) + assign(".mtx", mtx, envir = env) + ff[["mtx"]] <- quote(.mtx) } - } eval(as.call(c(list(quote(.Call), quote(C_demeanlist)), ff)), env) } - diff --git a/R/efactory.R b/R/efactory.R index 5c3e3a0..55e5339 100644 --- a/R/efactory.R +++ b/R/efactory.R @@ -6,126 +6,136 @@ #' Create estimable function -#' +#' #' Creates an estimable function for a factor-structure. -#' -#' There are several possibilities for the input parameter \code{opt}. -#' \itemize{ \item \code{"ref"} yields an estimable function which is similar -#' to the default one in \code{\link{lm}}, one reference is forced to \code{0} -#' in each connected component. \item \code{"zm"} Similar to \code{"ref"}, but +#' +#' There are several possibilities for the input parameter `opt`. +#' \itemize{ \item `"ref"` yields an estimable function which is similar +#' to the default one in [lm()], one reference is forced to `0` +#' in each connected component. \item `"zm"` Similar to `"ref"`, but #' the factor which does not contain a reference is made to have zero mean, and -#' an intercept is added. \item \code{"zm2"} Similar to \code{"zm"}, but both -#' factors are made to have zero mean. \item \code{"ln"} Least norm function. +#' an intercept is added. \item `"zm2"` Similar to `"zm"`, but both +#' factors are made to have zero mean. \item `"ln"` Least norm function. #' This will yield the raw coefficients from the Kaczmarz-method, i.e. the #' solution with smallest norm. This function is not estimable. } Note that in #' the case with more than two factors, it is not known how to analyze the #' factors to find the structure of the rank-deficiencies, i.e. the estimable #' functions. In this case, the factors beyond the first two are assumed not #' to contribute to the rank-deficiency beyond a single dimension in each. -#' Both \code{"ref"} and \code{"zm"} keep one such reference at zero in each of +#' Both `"ref"` and `"zm"` keep one such reference at zero in each of #' these factors. This is the common method when using dummies. -#' +#' #' In the case that interactions are specified in the model, i.e. with -#' \code{x:f} in the second part of the formula, these terms are not analyzed -#' to create an estimable function. Only the pure \code{f} terms are used for -#' this purpose. It is assumed that the \code{x:f} terms are all identified. -#' Note that in this case, all the levels of \code{f} are included. -#' -#' @param obj object of class \code{"felm"}, usually, a result of a call to -#' \code{\link{felm}}. +#' `x:f` in the second part of the formula, these terms are not analyzed +#' to create an estimable function. Only the pure `f` terms are used for +#' this purpose. It is assumed that the `x:f` terms are all identified. +#' Note that in this case, all the levels of `f` are included. +#' +#' @param obj object of class `"felm"`, usually, a result of a call to +#' [felm()]. #' @param opt character. Which type of estimable function. #' @param ... various. -#' @return A function of two parameters \code{function(v,addnames)}. An -#' estimable function (i.e. the result is the vector of some length \code{N}) -#' of the input vector \code{v}. When \code{addnames==TRUE} the returned vector -#' should have names, and optionally an attribute \code{"extra"} which is a -#' list of vectors of length \code{N} which may be used to code additional +#' @return A function of two parameters `function(v,addnames)`. An +#' estimable function (i.e. the result is the vector of some length `N`) +#' of the input vector `v`. When `addnames==TRUE` the returned vector +#' should have names, and optionally an attribute `"extra"` which is a +#' list of vectors of length `N` which may be used to code additional #' information. #' @note The author is open to suggestions for other estimable functions, i.e. #' other useful normalizations of the solutions. -#' -#' It is not strictly necessary that the \code{obj} argument is of class -#' \code{"felm"}, any list with entries \code{"fe"} and \code{"cfactor"} of the -#' appropriate form will do. That is, \code{list(fe=fl,cfactor=compfactor(fl))} -#' where \code{fl} is the list of factors defining the component structure. -#' I.e. if the model is \code{y ~ ... |id + firm}, we have -#' \code{fl=list(id=id,firm=firm)}. +#' +#' It is not strictly necessary that the `obj` argument is of class +#' `"felm"`, any list with entries `"fe"` and `"cfactor"` of the +#' appropriate form will do. That is, `list(fe=fl,cfactor=compfactor(fl))` +#' where `fl` is the list of factors defining the component structure. +#' I.e. if the model is `y ~ ... |id + firm`, we have +#' `fl=list(id=id,firm=firm)`. #' @examples -#' -#' oldopts <- options(lfe.threads=1) -#' id <- factor(sample(5000,50000,replace=TRUE)) -#' firm <- factor(sample(3000,50000,replace=TRUE)) -#' fl <- list(id=id,firm=firm) -#' obj <- list(fe=fl,cfactor=compfactor(fl)) +#' +#' oldopts <- options("lfe.threads") +#' options(lfe.threads = 2) +#' id <- factor(sample(5000, 50000, replace = TRUE)) +#' firm <- factor(sample(3000, 50000, replace = TRUE)) +#' fl <- list(id = id, firm = firm) +#' obj <- list(fe = fl, cfactor = compfactor(fl)) #' ## the trivial least-norm transformtion, which by the way is non-estimable -#' print(ef <- efactory(obj,'ln')) -#' is.estimable(ef,fl) +#' print(ef <- efactory(obj, "ln")) +#' is.estimable(ef, fl) #' ## then the default -#' print(ef <- efactory(obj,'ref')) -#' is.estimable(ef,fl) +#' print(ef <- efactory(obj, "ref")) +#' is.estimable(ef, fl) #' # get the names of the coefficients, i.e. the nm-variable in the function -#' head(evalq(nm,environment(ef))) +#' head(evalq(nm, environment(ef))) #' options(oldopts) -#' +#' #' @export efactory -efactory <- function(obj, opt='ref', ...) { - +efactory <- function(obj, opt = "ref", ...) { # only factors without covariates are relevant to analyze - purefes <- sapply(obj$fe, function(f) is.null(attr(f,'x',exact=TRUE))) + purefes <- sapply(obj$fe, function(f) is.null(attr(f, "x", exact = TRUE))) pfe <- obj$fe[purefes] -# allnm <- unlist(lapply(names(obj$fe),function(n) paste(n,levels(obj$fe[[n]]),sep='\003'))) - allnm <- unlist(lapply(names(obj$fe),function(n) xlevels(n,obj$fe[[n]],sep='\003'))) - -# the names of the dummies, e.g. id.4 firm.23 -# nm <- unlist(lapply(names(pfe),function(n) paste(n,levels(pfe[[n]]),sep='\003'))) - nm <- unlist(lapply(names(pfe),function(n) xlevels(n,pfe[[n]],sep='\003'))) + # allnm <- unlist(lapply(names(obj$fe),function(n) paste(n,levels(obj$fe[[n]]),sep='\003'))) + allnm <- unlist(lapply(names(obj$fe), function(n) xlevels(n, obj$fe[[n]], sep = "\003"))) + + # the names of the dummies, e.g. id.4 firm.23 + # nm <- unlist(lapply(names(pfe),function(n) paste(n,levels(pfe[[n]]),sep='\003'))) + nm <- unlist(lapply(names(pfe), function(n) xlevels(n, pfe[[n]], sep = "\003"))) # create an index where the pure fe's belong in the full array - allpos <- match(nm,allnm) + allpos <- match(nm, allnm) - mkallvec <- function(x) {res <- rep(NA,length(allnm)); res[allpos] <- allpos[x]; res;} -# how many obervations for each level - lobs <- lapply(pfe,table) - obs <- unlist(lobs) - names(obs) <- unlist(lapply(names(lobs), function(n) paste(n,'\003',names(lobs[[n]]),sep=''))) + mkallvec <- function(x) { + res <- rep(NA, length(allnm)) + res[allpos] <- allpos[x] + res + } + # how many obervations for each level + lobs <- lapply(pfe, table) + obs <- unlist(lobs) + names(obs) <- unlist(lapply(names(lobs), function(n) paste(n, "\003", names(lobs[[n]]), sep = ""))) -# allobs <- unlist(lapply(obj$fe,table)) - allobs <- unlist(lapply(obj$fe,function(f) { - x <- attr(f,'x',exact=TRUE) - if(is.null(x)) return(table(f)) - if(!is.matrix(x)) return(table(f)) + # allobs <- unlist(lapply(obj$fe,table)) + allobs <- unlist(lapply(obj$fe, function(f) { + x <- attr(f, "x", exact = TRUE) + if (is.null(x)) { + return(table(f)) + } + if (!is.matrix(x)) { + return(table(f)) + } return(rep(table(f), ncol(x))) - })) + })) - if(length(pfe) == 2) { + if (length(pfe) == 2) { # now, find the component of each parameter, i.e. each level. We do this # by finding the first occurence of each level, i.e. match(levels(f),f) - comp <- factor(unlist(lapply(pfe, function(f) obj$cfactor[match(levels(f),f)]))) + comp <- factor(unlist(lapply(pfe, function(f) obj$cfactor[match(levels(f), f)]))) ncomp <- nlevels(comp) - } else if(length(pfe) > 2) { + } else if (length(pfe) > 2) { # we should formally assign unique component numbers for factors beyond the second - comp <- factor(unlist(lapply(pfe[1:2], function(f) obj$cfactor[match(levels(f),f)]))) + comp <- factor(unlist(lapply(pfe[1:2], function(f) obj$cfactor[match(levels(f), f)]))) ncomp <- nlevels(comp) - exlvls <- (nlevels(comp)+1):(nlevels(comp)+1 + length(pfe)-3) - comp <- as.factor(c(comp,unlist(mapply(rep,exlvls,unlist(lapply(pfe[3:length(pfe)],nlevels)))))) + exlvls <- (nlevels(comp) + 1):(nlevels(comp) + 1 + length(pfe) - 3) + comp <- as.factor(c(comp, unlist(mapply(rep, exlvls, unlist(lapply(pfe[3:length(pfe)], nlevels)))))) } else { - comp <- factor(rep(1,length(obs))) + comp <- factor(rep(1, length(obs))) ncomp <- 1 } - if(length(pfe) == 0) { - nm <- unlist(lapply(names(obj$fe),function(n) xlevels(n,obj$fe[[n]],sep='.'))) - return(function(v,addnames) { - if(!addnames) return(v) + if (length(pfe) == 0) { + nm <- unlist(lapply(names(obj$fe), function(n) xlevels(n, obj$fe[[n]], sep = "."))) + return(function(v, addnames) { + if (!addnames) { + return(v) + } names(v) <- nm v }) } - refnames <- unlist(tapply(obs,comp,function(l) names(which.max(l)))) + refnames <- unlist(tapply(obs, comp, function(l) names(which.max(l)))) # now v[refnames] will be the reference values - refno <- match(refnames,nm) + refno <- match(refnames, nm) refsub <- refno[comp] # refsub is a vector, in entry i it contains the reference for entry i # i.e. we should do a v <- v - v[refsub] @@ -139,35 +149,37 @@ efactory <- function(obj, opt='ref', ...) { # so which ones belong to which factor? # make a factor to decide - fef <- factor(unlist(lapply(names(pfe),function(n) rep(n,nlevels(pfe[[n]]))))) -# allfef <- factor(unlist(lapply(names(obj$fe),function(n) rep(n,nlevels(obj$fe[[n]]))))) - allfef <- factor(unlist(lapply(names(obj$fe),function(n) nxlevels(n,obj$fe[[n]])))) + fef <- factor(unlist(lapply(names(pfe), function(n) rep(n, nlevels(pfe[[n]]))))) + # allfef <- factor(unlist(lapply(names(obj$fe),function(n) rep(n,nlevels(obj$fe[[n]]))))) + allfef <- factor(unlist(lapply(names(obj$fe), function(n) nxlevels(n, obj$fe[[n]])))) # level of the factor -# idx <- factor(unlist(lapply(obj$fe,function(f) levels(f)))) - idx <- factor(unlist(lapply(obj$fe,function(f) { - x <- attr(f,'x',exact=TRUE) - if(is.null(x) || !is.matrix(x)) return(levels(f)) + # idx <- factor(unlist(lapply(obj$fe,function(f) levels(f)))) + idx <- factor(unlist(lapply(obj$fe, function(f) { + x <- attr(f, "x", exact = TRUE) + if (is.null(x) || !is.matrix(x)) { + return(levels(f)) + } return(rep(levels(f), ncol(x))) }))) # then figure out in which factor the reference is # make sure to allow '.' in factor names - rf <- sub('(^.*)\003..*$','\\1',refnames) + rf <- sub("(^.*)\003..*$", "\\1", refnames) # now, create a refsubs which is the ones to be subtracted # each refsub belonging to somthing else than the reference factor # should be NA'ed. - if(length(pfe) > 2) { - extra <- (length(refno)-length(pfe)+3):length(refno) - sw <- c(names(pfe)[c(2,1)],rep('.NA',length(pfe)-2)) + if (length(pfe) > 2) { + extra <- (length(refno) - length(pfe) + 3):length(refno) + sw <- c(names(pfe)[c(2, 1)], rep(".NA", length(pfe) - 2)) } else { - swap <- if(length(pfe) == 2) c(2,1) else 1 + swap <- if (length(pfe) == 2) c(2, 1) else 1 sw <- names(pfe)[swap] extra <- integer(0) } names(sw) <- names(pfe) otherf <- sw[rf] - # which should we not subtract? - # Those which are + # which should we not subtract? + # Those which are nosub <- fef != rf[comp] refsubs <- refsub refsubs[nosub] <- NA @@ -183,9 +195,9 @@ efactory <- function(obj, opt='ref', ...) { # there are two variants, either centre on the means in # both factors, or only in the one without a reference # we create a factor zfact which describes the groups - # to mean. + # to mean. + - # create a minimal environment for a function extrarefs <- allpos[extrarefs] @@ -193,129 +205,129 @@ efactory <- function(obj, opt='ref', ...) { refsuba <- mkallvec(refsuba) obs <- allobs fef <- allfef -# nm <- unlist(lapply(names(obj$fe),function(n) paste(n,levels(obj$fe[[n]]),sep='.'))) - nm <- unlist(lapply(names(obj$fe),function(n) xlevels(n,obj$fe[[n]],sep='.'))) -# allcomp <- rep(0,sum(sapply(obj$fe,nlevels))) - allcomp <- rep(0,length(allnm)) + # nm <- unlist(lapply(names(obj$fe),function(n) paste(n,levels(obj$fe[[n]]),sep='.'))) + nm <- unlist(lapply(names(obj$fe), function(n) xlevels(n, obj$fe[[n]], sep = "."))) + # allcomp <- rep(0,sum(sapply(obj$fe,nlevels))) + allcomp <- rep(0, length(allnm)) allcomp[allpos] <- comp comp <- allcomp - fenv <- list(extrarefs=extrarefs,refsubs=refsubs,refsuba=refsuba,fef=fef,nm=nm) + fenv <- list(extrarefs = extrarefs, refsubs = refsubs, refsuba = refsuba, fef = fef, nm = nm) ef <- switch(as.character(opt), - ln={ - local(function(v,addnames) { - if(addnames) { - names(v) <- nm - attr(v,'extra') <- list(obs=obs,comp=comp,fe=fef,idx=idx) - } - v - },list(obs=obs,comp=comp,fe=fef,idx=idx,nm=nm)) - }, - # one reference in each component - ref={ - fenv$comp <- comp - fenv$mkallvec <- mkallvec - local(function(v,addnames) { - esum <- sum(v[extrarefs]) - df <- v[refsubs] - sub <- ifelse(is.na(df),0,df) - df <- v[refsuba] - add <- ifelse(is.na(df),0,df+esum) - v <- v - sub + add - if(addnames) { - names(v) <- nm - attr(v,'extra') <- list(obs=obs,comp=comp,fe=fef,idx=idx) - } - v - },fenv) - }, - zm={ - # now, what if we want zero-means on the other-factor? - # we will then get an intercept for each component, and - # zero means. It's those which are in nosub, but partitioned - # into components. We may do this faster now that we've - # separated it from the ordinary 'ref' - zfact <- comp - zfact[!nosub] <- NA - - enames <- paste('icpt',1:ncomp,sep='.') - zcomp <- factor(c(comp,1:ncomp)) - oo <- order(zcomp) - fenv$oo <- oo - fenv$zfact <- zfact - fenv$zcomp <- zcomp[oo] - fenv$enames <- enames - fenv$obs <- c(obs,table(obj$cfactor))[oo] - ef <- local(function(v,addnames) { - esum <- sum(v[extrarefs]) - df <- v[refsubs] - sub <- ifelse(is.na(df),0,df) - df <- v[refsuba] - add <- ifelse(is.na(df),0,df+esum) - v <- v - sub + add - means <- tapply(v,zfact,mean) - mn <- means[zfact] - mn <- ifelse(is.na(mn),0,mn) - v <- v - mn - v <- c(v,means)[oo] - if(addnames) { - names(v) <- c(nm,enames)[oo] - attr(v,'extra') <- list(obs=obs,comp=zcomp) - } - v - },fenv) - }, - # one reference in each component, but zero-means in the other factor - # and an intercept - zm2={ - # both factors (but not the extra factors): - # the interaction between comp and fef forms these groups, - zfact <- interaction(comp,fef) - # but skip the extra factors - zfact[as.integer(comp) > ncomp] <- NA - zfact <- factor(zfact) - # and the means should be added to the intercepts - # i.e. from the same component, add two and two - # ifact should consist of the components of the - # levels of zfact. - ifact <- factor(as.integer(gsub('^([0-9]+).*','\\1',levels(zfact))),exclude=NA) - enames <- paste('icpt',1:ncomp,sep='.') - zcomp <- factor(c(comp,1:ncomp)) - oo <- order(zcomp) - fenv$oo <- oo - fenv$zfact <- zfact - fenv$zcomp <- zcomp[oo] - fenv$enames <- enames - fenv$obs <- c(obs,table(obj$cfactor))[oo] - fenv$ifact <- ifact - ef <- local(function(v,addnames) { - esum <- sum(v[extrarefs]) - df <- v[refsubs] - sub <- ifelse(is.na(df),0,df) - df <- v[refsuba] - add <- ifelse(is.na(df),0,df+esum) - v <- v - sub + add - means <- tapply(v,zfact,mean) - mn <- means[zfact] - mn <- ifelse(is.na(mn),0,mn) - v <- v - mn - icpt <- tapply(means,ifact,sum) - v <- c(v,icpt)[oo] - if(addnames) { - names(v) <- c(nm,enames)[oo] - attr(v,'extra') <- list(obs=obs,comp=zcomp) - } - v - },fenv) - }, + ln = { + local(function(v, addnames) { + if (addnames) { + names(v) <- nm + attr(v, "extra") <- list(obs = obs, comp = comp, fe = fef, idx = idx) + } + v + }, list(obs = obs, comp = comp, fe = fef, idx = idx, nm = nm)) + }, + # one reference in each component + ref = { + fenv$comp <- comp + fenv$mkallvec <- mkallvec + local(function(v, addnames) { + esum <- sum(v[extrarefs]) + df <- v[refsubs] + sub <- ifelse(is.na(df), 0, df) + df <- v[refsuba] + add <- ifelse(is.na(df), 0, df + esum) + v <- v - sub + add + if (addnames) { + names(v) <- nm + attr(v, "extra") <- list(obs = obs, comp = comp, fe = fef, idx = idx) + } + v + }, fenv) + }, + zm = { + # now, what if we want zero-means on the other-factor? + # we will then get an intercept for each component, and + # zero means. It's those which are in nosub, but partitioned + # into components. We may do this faster now that we've + # separated it from the ordinary 'ref' + zfact <- comp + zfact[!nosub] <- NA - stop(paste('estimable function',opt,'not recognized')) - ) + enames <- paste("icpt", 1:ncomp, sep = ".") + zcomp <- factor(c(comp, 1:ncomp)) + oo <- order(zcomp) + fenv$oo <- oo + fenv$zfact <- zfact + fenv$zcomp <- zcomp[oo] + fenv$enames <- enames + fenv$obs <- c(obs, table(obj$cfactor))[oo] + ef <- local(function(v, addnames) { + esum <- sum(v[extrarefs]) + df <- v[refsubs] + sub <- ifelse(is.na(df), 0, df) + df <- v[refsuba] + add <- ifelse(is.na(df), 0, df + esum) + v <- v - sub + add + means <- tapply(v, zfact, mean) + mn <- means[zfact] + mn <- ifelse(is.na(mn), 0, mn) + v <- v - mn + v <- c(v, means)[oo] + if (addnames) { + names(v) <- c(nm, enames)[oo] + attr(v, "extra") <- list(obs = obs, comp = zcomp) + } + v + }, fenv) + }, + # one reference in each component, but zero-means in the other factor + # and an intercept + zm2 = { + # both factors (but not the extra factors): + # the interaction between comp and fef forms these groups, + zfact <- interaction(comp, fef) + # but skip the extra factors + zfact[as.integer(comp) > ncomp] <- NA + zfact <- factor(zfact) + # and the means should be added to the intercepts + # i.e. from the same component, add two and two + # ifact should consist of the components of the + # levels of zfact. + ifact <- factor(as.integer(gsub("^([0-9]+).*", "\\1", levels(zfact))), exclude = NA) + enames <- paste("icpt", 1:ncomp, sep = ".") + zcomp <- factor(c(comp, 1:ncomp)) + oo <- order(zcomp) + fenv$oo <- oo + fenv$zfact <- zfact + fenv$zcomp <- zcomp[oo] + fenv$enames <- enames + fenv$obs <- c(obs, table(obj$cfactor))[oo] + fenv$ifact <- ifact + ef <- local(function(v, addnames) { + esum <- sum(v[extrarefs]) + df <- v[refsubs] + sub <- ifelse(is.na(df), 0, df) + df <- v[refsuba] + add <- ifelse(is.na(df), 0, df + esum) + v <- v - sub + add + means <- tapply(v, zfact, mean) + mn <- means[zfact] + mn <- ifelse(is.na(mn), 0, mn) + v <- v - mn + icpt <- tapply(means, ifact, sum) + v <- c(v, icpt)[oo] + if (addnames) { + names(v) <- c(nm, enames)[oo] + attr(v, "extra") <- list(obs = obs, comp = zcomp) + } + v + }, fenv) + }, + stop(paste("estimable function", opt, "not recognized")) + ) -# try to byte compile the stuff - ef <- compiler::cmpfun(ef,list(optimize=3)) - if(length(pfe) <= 2 && as.character(opt) != 'ln' && all(purefes)) - attr(ef,'verified') <- TRUE + # try to byte compile the stuff + ef <- compiler::cmpfun(ef, list(optimize = 3)) + if (length(pfe) <= 2 && as.character(opt) != "ln" && all(purefes)) { + attr(ef, "verified") <- TRUE + } ef -} +} diff --git a/R/feglm.R b/R/feglm.R new file mode 100644 index 0000000..fb3d24f --- /dev/null +++ b/R/feglm.R @@ -0,0 +1,255 @@ +#' @title Fit a Poisson model with multiple group fixed effects +#' @inheritParams felm +#' @param offset this can be used to specify an \emph{a priori} known component +#' to be included in the linear predictor during fitting. This should be +#' \code{NULL} or a numeric vector or matrix of extents matching those of the +#' response. One or more \code{\link{offset}} terms can be included in the +#' formula instead or as well, and if more than one are specified their sum is +#' used. See \code{\link{model.offset}}. +#' @param robust logical value to return a robust standard error computation. +#' @param cluster optional variable to group by and compute sandwich-type +#' robust standard errors. Should be a formula of the form `~x_j` or +#' an object that be coerced to a formula. +#' @param tol tolerance value for GLM convergence criteria. +#' @importFrom Formula Formula +#' @importFrom Matrix Diagonal +#' @importFrom methods is +#' @seealso felm +#' @export fepois +fepois <- function(formula, data, + offset = NULL, + subset = NULL, + robust = TRUE, + cluster = NULL, + tol = 1e-10) { + if (!is.null(subset)) { data <- data[subset, ] } + + if (is.character(formula)) { formula <- as.formula(formula) } + if (is.character(cluster)) { cluster <- as.formula(paste0("~", cluster)) } + if (is.character(offset)) { offset <- as.formula(paste0("~", offset)) } + + formula <- Formula(formula) + + offset2 <- offset + + if (is.null(offset)) { + offset <- rep(0, nrow(data)) + } else { + offset <- data[[all.vars(formula(offset, lhs = 1, rhs = 0))]] + } + + vardep <- all.vars(formula(formula, lhs = 1, rhs = 0)) + vardep <- data[, vardep, drop = TRUE] + + if (min(vardep) < 0) { + stop("y should be greater or equals to zero.") + } + + fe <- all.vars(formula(formula, lhs = 0, rhs = 2)) + + for (f in fe) { + if (!is(data[, f], "factor")) { + data[, f] <- as.factor(data[, f, drop = TRUE]) + } + } + + max_vardep <- max(vardep, na.rm = TRUE) + vardep <- vardep / max_vardep + + mu <- (vardep + 0.5) / 2 + eta <- log(mu) - offset + z <- eta + (vardep - mu) / mu + + # Formula + + varind <- all.vars(formula(formula, lhs = 0, rhs = 1)) + + formula <- as.formula(paste0( + "z ~ ", ifelse(is.null(varind), " -1 ", paste0(varind, collapse = " + ")), " | ", + paste0(fe, collapse = " + ") + )) + + dif <- 1 + rss1 <- 1 + + while (abs(dif) > tol) { + reg <- felm(formula = formula, data = data, weights = mu) + + eta <- z - reg$residuals + offset + mu <- exp(eta) + z <- (eta - offset) + (vardep - mu) / mu + + res <- vardep - mu + rss2 <- sum(res^2) + dif <- rss2 - rss1 + rss1 <- rss2 + dev <- 2 * max_vardep * sum(vardep[vardep > 0] * log(vardep[vardep > 0] / mu[vardep > 0])) + } + + z <- z + log(max_vardep) + reg <- felm(formula = formula, data = data, weights = mu) + + if (!is.null(varind)) { + z <- data.frame(id = seq_len(nrow(data))) + for (i in varind) { + fe_tmp <- paste0(fe, collapse = " + ") + formula_tmp <- as.formula(paste0( + i, " ~ -1 ", + ifelse(!is.null(offset2), " + offset ", ""), + "| ", fe_tmp + )) + fit.tmp <- felm(formula = formula_tmp, data = data, weights = mu) + z[[i]] <- fit.tmp$residuals + } + z <- z[, -1] + z <- as.matrix(z) + + n <- reg$N + k <- length(varind) + W1 <- Diagonal(mu, n = n) + bread <- solve(t(z) %*% W1 %*% z) + + res <- vardep - mu + + if (robust) { + if (is.null(cluster)) { + W2 <- Diagonal((res^2), n = n) + meat <- t(z) %*% W2 %*% z + } else { + cluster <- data[[all.vars(formula(cluster, lhs = 1, rhs = 0))]] + m <- length(unique(cluster)) + dfc <- (m / (m - 1)) * ((n - 1) / (n - k)) + + meat <- matrix(0, nrow = length(varind), ncol = length(varind)) + + for (i in unique(cluster)) { + z_tmp <- as.matrix(z[cluster == i,]) + res_tmp <- res[cluster == i] + W2_tmp <- res_tmp %*% t(res_tmp) + meat <- meat + (t(z_tmp) %*% W2_tmp %*% z_tmp) + } + } + } + + vcov <- if (robust) { + bread %*% meat %*% bread + } else { + bread + } + + reg$vcv <- vcov + reg$se <- sqrt(diag(reg$vcv)) + reg$tval <- reg$coefficients / reg$se + reg$pval <- 1 - pnorm(abs(reg$tval)) + + if (robust) { + reg$vcv <- vcov + reg$se <- sqrt(diag(reg$vcv)) + reg$tval <- reg$coefficients / reg$se + reg$pval <- 1 - pnorm(abs(reg$tval)) + + reg$robustvcv <- vcov + reg$rse <- sqrt(diag(reg$robustvcv)) + reg$rtval <- reg$coefficients / reg$rse + reg$rpval <- 1 - pnorm(abs(reg$rtval)) + } + } + + x_fe <- data[, fe, drop = FALSE] + x_fe$order <- 1:nrow(x_fe) + len_fe <- length(fe) + + for (i in seq_len(len_fe)) { + fe_tmp <- getfe(reg) + fe_tmp <- fe_tmp[fe_tmp$fe == fe[i], c("idx", "effect")] + + colnames(fe_tmp) <- c(fe[i], paste0("fe_", fe[i])) + x_fe <- merge(x_fe, fe_tmp, by = fe[i], all.x = TRUE) + } + x_fe <- x_fe[order(x_fe$order), -(len_fe + 1)] + x_fe[, seq_len(len_fe)] <- sapply(x_fe[, seq_len(len_fe)], as.character) + reg$fixed.effects <- x_fe + + x_fe <- x_fe[, !names(x_fe) %in% fe, drop = FALSE] + x_fe <- apply(x_fe, 1, sum) + + if (!is.null(varind)) { + reg$fitted.values <- as.numeric(exp(as.matrix(data[, rownames(reg$coefficients)]) %*% reg$coefficients + offset + x_fe)) + } else { + reg$fitted.values <- as.numeric(exp(offset + x_fe)) + } + + names(reg$fitted.values) <- rownames(data) + + class(reg) <- "fepois" + return(reg) +} + +#' @exportS3Method +summary.fepois <- function(object, ...) { + class(object) <- "summary.fepois" + return(object) +} + +#' @exportS3Method +print.summary.fepois <- function(x, ...) { + cat("Coefficients: \n") + results <- data.frame( + Estimate = x$coefficients, + `Std. Error` = x$se, + `t value` = x$tval, + `Pr(>|t|)` = x$pval + ) + results <- as.matrix(results) + colnames(results) <- c("Estimate", "Std. Error", "t value", "Pr(>|t|)") + return(printCoefmat(results, digits = 4)) +} + +#' @exportS3Method +predict.fepois <- function(object, newdata = NULL, offset = NULL, type = "link", ...) { + stopifnot(any(type %in% c("link", "response"))) + + if (is.null(offset)) offset <- rep(0, nrow(newdata)) + + fe <- names(object$fe) + x_fe <- newdata[, fe, drop = FALSE] + x_fe$order <- 1:nrow(x_fe) + len_fe <- length(fe) + + for (i in 1:len_fe) { + fe_tmp <- getfe(object) + fe_tmp <- fe_tmp[fe_tmp$fe == fe[i], c("idx", "effect")] + + colnames(fe_tmp) <- c(fe[i], paste0("fe_", fe[i])) + + x_fe <- merge(x_fe, fe_tmp, by = fe[i], all.x = TRUE) + } + + x_fe <- x_fe[order(x_fe$order), -(len_fe + 1)] + + x_fe[, seq_len(len_fe)] <- sapply(x_fe[, 1:len_fe], as.character) + object$fixed.effects <- x_fe + + x_fe <- x_fe[, !names(x_fe) %in% fe, drop = FALSE] + x_fe <- apply(x_fe, 1, sum) + + x <- rownames(object$beta) + + # return x_beta + if (!is.null(x)) { + out <- as.matrix(newdata[, x]) %*% object$coefficients + offset + x_fe + } else { + out <- offset + x_fe + } + + if (type == "link") { + out <- as.numeric(out) + } + if (type == "response") { + out <- as.numeric(exp(out)) + } + + names(out) <- rownames(newdata) + + return(out) +} diff --git a/R/felm.R b/R/felm.R index 9b408b1..d7a6481 100644 --- a/R/felm.R +++ b/R/felm.R @@ -5,16 +5,16 @@ -makematrix <- function(mf, contrasts=NULL, pf=parent.frame(), - clustervar=NULL, wildcard='n', onlymm=FALSE) { +makematrix <- function(mf, contrasts = NULL, pf = parent.frame(), + clustervar = NULL, wildcard = "n", onlymm = FALSE) { m <- match(c("formula", "data", "subset", "na.action"), names(mf), 0L) - wpos <- which(!is.na(pmatch(names(mf),'weights'))) - if(length(wpos) > 0) { - weights <- eval(mf[[wpos]],pf) - if(!is.null(weights)) { - if(anyNA(weights) || any(weights < 0)) stop('missing or negative weights not allowed') + wpos <- which(!is.na(pmatch(names(mf), "weights"))) + if (length(wpos) > 0) { + weights <- eval(mf[[wpos]], pf) + if (!is.null(weights)) { + if (anyNA(weights) || any(weights < 0)) stop("missing or negative weights not allowed") weights <- sqrt(weights) - weights[weights==0] <- 1e-60 + weights[weights == 0] <- 1e-60 } } else { weights <- NULL @@ -29,7 +29,7 @@ makematrix <- function(mf, contrasts=NULL, pf=parent.frame(), # sides, just to get hold of the rhs. Then we extract the left hand side # We need to remove the iv-spec from the Formula. It requires its own specification - Form <- eval(mf[['formula']], pf) + Form <- eval(mf[["formula"]], pf) formenv <- environment(Form) Form <- as.Formula(Form) @@ -38,96 +38,109 @@ makematrix <- function(mf, contrasts=NULL, pf=parent.frame(), numrhs <- length(Form)[2] # we can't just dot-update the iv-part, update will only keep the instruments - if(numrhs < 2) Form <- update(Form, . ~ . | 0 | 0 | 0 | 0, drop=FALSE) - else if(numrhs < 3) Form <- update(Form, . ~ . | . | 0 | 0 | 0 , drop=FALSE) - else if(numrhs < 4) { + if (numrhs < 2) { + Form <- update(Form, . ~ . | 0 | 0 | 0 | 0, drop = FALSE) + } else if (numrhs < 3) { + Form <- update(Form, . ~ . | . | 0 | 0 | 0, drop = FALSE) + } else if (numrhs < 4) { # build from parts - Form <- as.Formula(do.call(substitute, list(L ~ R1 | R2 | R3 | 0 | 0, - list(L=formula(Form,lhs=NULL,rhs=0)[[2]], - R1=formula(Form,lhs=0,rhs=1)[[2]], - R2=formula(Form,lhs=0,rhs=2)[[2]], - R3=formula(Form,lhs=0,rhs=3)[[2]])))) - } else if(numrhs < 5) { - Form <- as.Formula(do.call(substitute, list(L ~ R1 | R2 | R3 | R4 | 0, - list(L=formula(Form,lhs=NULL,rhs=0)[[2]], - R1=formula(Form,lhs=0,rhs=1)[[2]], - R2=formula(Form,lhs=0,rhs=2)[[2]], - R3=formula(Form,lhs=0,rhs=3)[[2]], - R4=formula(Form,lhs=0,rhs=4)[[2]])))) - + Form <- as.Formula(do.call(substitute, list( + L ~ R1 | R2 | R3 | 0 | 0, + list( + L = formula(Form, lhs = NULL, rhs = 0)[[2]], + R1 = formula(Form, lhs = 0, rhs = 1)[[2]], + R2 = formula(Form, lhs = 0, rhs = 2)[[2]], + R3 = formula(Form, lhs = 0, rhs = 3)[[2]] + ) + ))) + } else if (numrhs < 5) { + Form <- as.Formula(do.call(substitute, list( + L ~ R1 | R2 | R3 | R4 | 0, + list( + L = formula(Form, lhs = NULL, rhs = 0)[[2]], + R1 = formula(Form, lhs = 0, rhs = 1)[[2]], + R2 = formula(Form, lhs = 0, rhs = 2)[[2]], + R3 = formula(Form, lhs = 0, rhs = 3)[[2]], + R4 = formula(Form, lhs = 0, rhs = 4)[[2]] + ) + ))) } - if(numrhs > 5) stop("Formula can't have more than 5 parts") -# Make a suitable formula for a model frame. No tricky IV-spec -# fullF <- formula(Form,lhs=NULL,rhs=0, drop=FALSE,collapse=TRUE,update=TRUE) - fullF <- formula(Form,lhs=NULL,rhs=0, drop=FALSE) - for(i in seq_len(length(Form)[2])) { - f <- formula(Form,lhs=0,rhs=i,drop=FALSE)[[2]] - if(i == 3) { - if(identical(f,0)) next + if (numrhs > 5) stop("Formula can't have more than 5 parts") + # Make a suitable formula for a model frame. No tricky IV-spec + # fullF <- formula(Form,lhs=NULL,rhs=0, drop=FALSE,collapse=TRUE,update=TRUE) + fullF <- formula(Form, lhs = NULL, rhs = 0, drop = FALSE) + for (i in seq_len(length(Form)[2])) { + f <- formula(Form, lhs = 0, rhs = i, drop = FALSE)[[2]] + if (i == 3) { + if (identical(f, 0)) next f <- as.Formula(f[[2]]) # skip '(' - f <- formula(f,collapse=TRUE, drop=FALSE) - fullF <- update(fullF, formula(substitute(. ~ . + F1+F2, list(F1=f[[2]], F2=f[[3]]))), drop=FALSE, collapse=TRUE) + f <- formula(f, collapse = TRUE, drop = FALSE) + fullF <- update(fullF, formula(substitute(. ~ . + F1 + F2, list(F1 = f[[2]], F2 = f[[3]]))), drop = FALSE, collapse = TRUE) } else { - fullF <- update(fullF, formula(substitute(. ~ . + F, list(F=f))),drop=FALSE) + fullF <- update(fullF, formula(substitute(. ~ . + F, list(F = f))), drop = FALSE) } } - usewild <- !identical(wildcard,'n') - dataenv <- new.env(parent=pf) - if(usewild) { + usewild <- !identical(wildcard, "n") + dataenv <- new.env(parent = pf) + if (usewild) { # we must evalaute the data argument, but we want to # avoid it being reevaluated when we eval(mf), # so put it in an environment. We do it like this # to have a short name in mf[['data']] in case of errors. - data <- eval(mf[['data']],pf) - assign('..(@DATA@)..',data,dataenv) - mf[['data']] <- as.name('..(@DATA@)..') + data <- eval(mf[["data"]], pf) + assign("..(@DATA@)..", data, dataenv) + mf[["data"]] <- as.name("..(@DATA@)..") wildnames <- colnames(data) rm(data) - if(wildcard == 'R' || wildcard == 'G') - wildnames <- unique(c(wildnames, rls(formenv))) - rewild <- wildcard %in% c('r','R') - fullF <- wildcard(fullF, wildnames, re=rewild) + if (wildcard == "R" || wildcard == "G") { + wildnames <- unique(c(wildnames, rls(formenv))) + } + rewild <- wildcard %in% c("r", "R") + fullF <- wildcard(fullF, wildnames, re = rewild) } environment(fullF) <- formenv - mf[['formula']] <- fullF + mf[["formula"]] <- fullF # coerce pdata.frame (from plm) to ensure classes and attributes are preserved in model.frame # http://stackoverflow.com/questions/29724813/how-to-calculate-dynamic-panel-models-with-lfe-package - if(!is.null(mf[['data']])) { - frname <- deparse(mf[['data']]) - assign('..pdata.coerce..', - function(x) { - if(inherits(x,'pdata.frame')) { - if(!requireNamespace('plm')) - stop('Needs package plm to handle pdata.frame ', frname, call.=FALSE) - as.data.frame(x) - } else { - x - } - }, - dataenv) - mf[['data']] <- bquote(..pdata.coerce..(.(mf[['data']]))) + if (!is.null(mf[["data"]])) { + frname <- deparse(mf[["data"]]) + assign( + "..pdata.coerce..", + function(x) { + if (inherits(x, "pdata.frame")) { + if (!requireNamespace("plm")) { + stop("Needs package plm to handle pdata.frame ", frname, call. = FALSE) + } + as.data.frame(x) + } else { + x + } + }, + dataenv + ) + mf[["data"]] <- bquote(..pdata.coerce..(.(mf[["data"]]))) } mfcall <- bquote(evalq(.(mf), .(dataenv))) mf <- eval(mfcall) - if(nrow(mf) == 0) stop('0 (non-NA) cases; no valid data') + if (nrow(mf) == 0) stop("0 (non-NA) cases; no valid data") rm(dataenv) naact <- na.action(mf) - if(!is.null(naact) && !is.null(weights)) weights <- weights[-naact] - -# if(is.null(mf$data)) data <- environment(mf[['formula']]) + if (!is.null(naact) && !is.null(weights)) weights <- weights[-naact] + + # if(is.null(mf$data)) data <- environment(mf[['formula']]) # the factor list (rhs=2) needs special attention # it should be made into a model matrix, but treated specially. # It's a sum of terms like f + x:g - fpart <- formula(Form, lhs=0, rhs=2) - if(usewild) fpart <- wildcard(fpart,wildnames,re=rewild) + fpart <- formula(Form, lhs = 0, rhs = 2) + if (usewild) fpart <- wildcard(fpart, wildnames, re = rewild) ftm <- terms(fpart) # we make it into a call like @@ -135,106 +148,140 @@ makematrix <- function(mf, contrasts=NULL, pf=parent.frame(), # which we evaluate in the frame # make a function for ':' env <- new.env(parent = formenv) -# make '*' a function of two arguments to do the interaction. -# assign(':', function(a,b) { -# anam <- deparse(substitute(a)) -# bnam <- deparse(substitute(b)) -# message(' call : ',anam, ':' ,bnam) -# if(is.factor(a) && is.factor(b)) ret <- structure(interaction(a,b,drop=TRUE),xnam=bnam,fnam=anam) -# else if(is.factor(b)) ret <- structure(factor(b),x=a,xnam=anam,fnam=bnam) -# else if(is.factor(a)) ret <- structure(factor(a),x=b,xnam=bnam,fnam=anam) -# else stop('Error in term ',anam,':',bnam,'. Neither ',anam, ' nor ',bnam,' is a factor') -# ret -# }, env) -# fl <- eval(attr(ftm,'variables'), mf, env) - vmat <- attr(ftm,'factors') - - fl <- lapply(attr(ftm,'term.labels'), function(tm) { + # make '*' a function of two arguments to do the interaction. + # assign(':', function(a,b) { + # anam <- deparse(substitute(a)) + # bnam <- deparse(substitute(b)) + # message(' call : ',anam, ':' ,bnam) + # if(is.factor(a) && is.factor(b)) ret <- structure(interaction(a,b,drop=TRUE),xnam=bnam,fnam=anam) + # else if(is.factor(b)) ret <- structure(factor(b),x=a,xnam=anam,fnam=bnam) + # else if(is.factor(a)) ret <- structure(factor(a),x=b,xnam=bnam,fnam=anam) + # else stop('Error in term ',anam,':',bnam,'. Neither ',anam, ' nor ',bnam,' is a factor') + # ret + # }, env) + # fl <- eval(attr(ftm,'variables'), mf, env) + vmat <- attr(ftm, "factors") + + fl <- lapply(attr(ftm, "term.labels"), function(tm) { # function for finding a factor name in the model frame. # It's really just to do mf[[n]], but in case of non-syntactical names like `a+b`, # the index name in mf is "a+b", whereas it's "`a+b'" in the terms object # so we must remove backticks before trying. - gv <- function(n) mf[[sub('^`(.*)`$','\\1',n)]] + gv <- function(n) mf[[sub("^`(.*)`$", "\\1", n)]] f <- gv(tm) # if it's a variable and only occurs in one term, pass it on - if(!is.null(f) && sum(vmat[tm,] > 0) == 1) return(structure(factor(f),fnam=tm)) -# if(!is.null(f)) return(structure(factor(f),fnam=tm)) + if (!is.null(f) && sum(vmat[tm, ] > 0) == 1) { + return(structure(factor(f), fnam = tm)) + } + # if(!is.null(f)) return(structure(factor(f),fnam=tm)) # It's an interaction of some sort, find the variables in the interaction - vars <- attr(ftm,'factors')[,tm] + vars <- attr(ftm, "factors")[, tm] vars <- vars[vars != 0] nm <- names(vars) # find the factors isfac <- sapply(nm, function(n) is.factor(gv(n))) xx <- names(vars)[which(!isfac)] - if(length(xx) > 1) stop('Interaction only allowed for one non-factor') + if (length(xx) > 1) stop("Interaction only allowed for one non-factor") # interact the factors # remove a reference level from the ones which are 1 hasref <- vars == 1 noref <- vars == 2 - + # find the reference, we choose the largest level reffac <- which(isfac & hasref) namref <- names(vars[reffac]) reflev <- sapply(namref, function(n) which(names(which.max(table(gv(n)))) %in% levels(gv(n)))) names(reflev) <- namref # make a list with the reference level replaced - if(length(xx) == 0) { -# rflist <- lapply(namref, function(n) {f <- mf[[n]]; levels(f)[[1]] <- NA; f}) - if(length(namref) == 1 && sum(noref&isfac) == 0) - rflist <- list(gv(namref)) - else - rflist <- lapply(namref, function(n) {f <- gv(n); levels(f)[[reflev[[n]]]] <- NA; f}) + if (length(xx) == 0) { + # rflist <- lapply(namref, function(n) {f <- mf[[n]]; levels(f)[[1]] <- NA; f}) + if (length(namref) == 1 && sum(noref & isfac) == 0) { + rflist <- list(gv(namref)) + } else { + ## GRM: Precursor to larger change below (i.e. don't replace individual + ## FEs with NA yet, lest it creates too many reference cases once we + ## interact them). + # rflist <- lapply(namref, function(n) {f <- gv(n); levels(f)[[reflev[[n]]]] <- NA; f}) + rflist <- lapply(namref, function(n) { + f <- gv(n) + f + }) + } names(rflist) <- namref - f <- addNA(do.call(interaction,c(rflist,lapply(names(vars[noref&isfac]), function(n) gv(n)), drop=TRUE)), - ifany=TRUE) - refnam <- paste(sapply(namref, function(n) levels(gv(n))[reflev[n]]), collapse='+') + ## GRM: Changed this next section of code to account for single reference + ## case in the case of interacted FEs + if (length(rflist) == 1) { + f <- addNA( + do.call( + interaction, + c(rflist, lapply( + names(vars[noref & isfac]), + function(n) gv(n) + ), drop = TRUE) + ), + ifany = TRUE + ) + } else { + f <- do.call(interaction, c(rflist, lapply( + names(vars[noref & isfac]), + function(n) gv(n) + ), + drop = TRUE + )) + reflevcomb <- paste(reflev, collapse = ".") + levels(f)[which(f == reflevcomb)] <- NA + f <- addNA(f, ifany = TRUE) + } + refnam <- paste(sapply(namref, function(n) levels(gv(n))[reflev[n]]), collapse = "+") levels(f)[is.na(levels(f))] <- refnam -# structure(f, fnam=names(vars)[1], xnam=paste(names(vars)[-1],collapse=':')) - structure(f, fnam=paste(names(vars),collapse=':')) + # structure(f, fnam=names(vars)[1], xnam=paste(names(vars)[-1],collapse=':')) + structure(f, fnam = paste(names(vars), collapse = ":")) } else { - f <- do.call(interaction,c(mf[names(vars)[isfac]], drop=TRUE)) + f <- do.call(interaction, c(mf[names(vars)[isfac]], drop = TRUE)) f <- f[!is.na(f)] - structure(f,fnam=paste(names(vars[isfac]),collapse=':'), x=mf[[xx]], xnam=xx) + structure(f, fnam = paste(names(vars[isfac]), collapse = ":"), x = mf[[xx]], xnam = xx) } # f <- eval(parse(text=tm), mf, env) -# if(is.null(attr(f, 'fnam'))) factor(f) else f + # if(is.null(attr(f, 'fnam'))) factor(f) else f }) - names(fl) <- attr(ftm, 'term.labels') + names(fl) <- attr(ftm, "term.labels") # Name the interactions with the matrix first, then the factor name names(fl) <- sapply(names(fl), function(n) { f <- fl[[n]] - x <- attr(f,'x',exact=TRUE) - if(is.null(x)) return(n) - return(paste(attr(f,'xnam'),attr(f,'fnam'), sep=':')) + x <- attr(f, "x", exact = TRUE) + if (is.null(x)) { + return(n) + } + return(paste(attr(f, "xnam"), attr(f, "fnam"), sep = ":")) }) - hasicpt <- all(sapply(fl,function(f) !is.null(attr(f,'x')))) + hasicpt <- all(sapply(fl, function(f) !is.null(attr(f, "x")))) environment(Form) <- formenv - if(is.null(clustervar)) { - cluform <- terms(formula(Form, lhs=0, rhs=4)) - cluster <- lapply(eval(attr(cluform,'variables'), mf, pf), factor) - names(cluster) <- attr(cluform,'term.labels') - if(length(cluster) == 0) cluster <- NULL + if (is.null(clustervar)) { + cluform <- terms(formula(Form, lhs = 0, rhs = 4)) + cluster <- lapply(eval(attr(cluform, "variables"), mf, pf), factor) + names(cluster) <- attr(cluform, "term.labels") + if (length(cluster) == 0) cluster <- NULL } else { # backwards compatible - if(is.character(clustervar)) clustervar <- as.list(clustervar) - if(!is.list(clustervar)) clustervar <- list(clustervar) + if (is.character(clustervar)) clustervar <- as.list(clustervar) + if (!is.list(clustervar)) clustervar <- list(clustervar) cluster <- lapply(clustervar, function(cv) { - if(!is.character(cv)) factor(cv) else factor(eval(as.name(cv),mf,formenv)) + if (!is.character(cv)) factor(cv) else factor(eval(as.name(cv), mf, formenv)) }) } - ivform <- formula(Form,lhs=0, rhs=3, drop=FALSE) + ivform <- formula(Form, lhs = 0, rhs = 3, drop = FALSE) # Pick up IV instruments - if(ivform[[1]] == as.name('~')) ivform <- ivform[[2]] - if(ivform[[1]] == as.name('(')) ivform <- ivform[[2]] - if(!identical(ivform,0)) { + if (ivform[[1]] == as.name("~")) ivform <- ivform[[2]] + if (ivform[[1]] == as.name("(")) ivform <- ivform[[2]] + if (!identical(ivform, 0)) { ivform <- as.Formula(ivform) - if(length(ivform)[2] > 1) stop("Right hand side of IV-spec can't have multiple parts") - inames <- as.character(attr(terms(formula(ivform, lhs=0, rhs=1)), 'variables'))[-1] + if (length(ivform)[2] > 1) stop("Right hand side of IV-spec can't have multiple parts") + inames <- as.character(attr(terms(formula(ivform, lhs = 0, rhs = 1)), "variables"))[-1] environment(ivform) <- formenv } else { ivform <- NULL @@ -242,15 +289,15 @@ makematrix <- function(mf, contrasts=NULL, pf=parent.frame(), } # then the fifth part, the controls - form <- formula(Form, lhs=0, rhs=5, drop=TRUE) - if(!identical(form[[2]],0)) { + form <- formula(Form, lhs = 0, rhs = 5, drop = TRUE) + if (!identical(form[[2]], 0)) { # always parse with intercept, remove it from matrix, so we never project out the intercept - form <- formula(update(form, ~ . +1)) - if(usewild) form <- wildcard(form, wildnames, re=rewild) - ctrlterms <- terms(form, data=mf) - ctrl <- delete.icpt(model.matrix(ctrlterms, data=mf, contrasts.arg=contrasts)) - if(typeof(ctrl) != 'double') storage.mode(ctrl) <- 'double' - if(ncol(ctrl) == 0) { + form <- formula(update(form, ~ . + 1)) + if (usewild) form <- wildcard(form, wildnames, re = rewild) + ctrlterms <- terms(form, data = mf) + ctrl <- delete.icpt(model.matrix(ctrlterms, data = mf, contrasts.arg = contrasts)) + if (typeof(ctrl) != "double") storage.mode(ctrl) <- "double" + if (ncol(ctrl) == 0) { ctrlnames <- ctrl <- NULL } else { ctrlnames <- colnames(ctrl) @@ -261,89 +308,95 @@ makematrix <- function(mf, contrasts=NULL, pf=parent.frame(), } # We have taken Form apart. Keep only exogenous variables - Form <- formula(Form, lhs=NULL, rhs=1, drop=FALSE) + Form <- formula(Form, lhs = NULL, rhs = 1, drop = FALSE) environment(Form) <- formenv -# model.response doesn't work with multiple responses -# y <- model.response(mf,"numeric") + # model.response doesn't work with multiple responses + # y <- model.response(mf,"numeric") - form <- formula(Form, lhs=NULL, rhs=0, drop=FALSE) - if(usewild) form <- wildcard(form, wildnames, re=rewild) - y <- as.matrix(model.part(form, mf, lhs=NULL, rhs=0), rownames.force=FALSE) - if(typeof(y) != 'double') storage.mode(y) <- 'double' + form <- formula(Form, lhs = NULL, rhs = 0, drop = FALSE) + if (usewild) form <- wildcard(form, wildnames, re = rewild) + y <- as.matrix(model.part(form, mf, lhs = NULL, rhs = 0), rownames.force = FALSE) + if (typeof(y) != "double") storage.mode(y) <- "double" form <- formula(Form, lhs = 0, rhs = 1, collapse = c(FALSE, TRUE)) - if(usewild) form <- wildcard(form, wildnames, re=rewild) - xterms <- terms(form, data=mf) - x <- model.matrix(xterms, data=mf, contrasts.arg=contrasts) -# if(length(fl) > 0) { - if(!hasicpt) { - x <- delete.icpt(x) + if (usewild) form <- wildcard(form, wildnames, re = rewild) + xterms <- terms(form, data = mf) + x <- model.matrix(xterms, data = mf, contrasts.arg = contrasts) + # if(length(fl) > 0) { + if (!hasicpt) { + x <- delete.icpt(x) icpt <- FALSE } else { - icpt <- attr(xterms,'intercept') != 0 + icpt <- attr(xterms, "intercept") != 0 } - if(typeof(x) != 'double') storage.mode(x) <- 'double' + if (typeof(x) != "double") storage.mode(x) <- "double" setdimnames(x, list(NULL, colnames(x))) - if(!is.null(ivform)) { - form <- formula(ivform, lhs=NULL, rhs=0, drop=FALSE) - if(usewild) form <- wildcard(form, wildnames, re=rewild) - ivy <- as.matrix(model.part(form, mf, lhs=NULL, rhs=0), rownames.force=FALSE) - if(typeof(ivy) != 'double') storage.mode(ivy) <- 'double' + if (!is.null(ivform)) { + form <- formula(ivform, lhs = NULL, rhs = 0, drop = FALSE) + if (usewild) form <- wildcard(form, wildnames, re = rewild) + ivy <- as.matrix(model.part(form, mf, lhs = NULL, rhs = 0), rownames.force = FALSE) + if (typeof(ivy) != "double") storage.mode(ivy) <- "double" form <- formula(ivform, lhs = 0, rhs = 1, collapse = c(FALSE, TRUE)) - if(usewild) form <- wildcard(form,wildnames,re=rewild) - ivxterms <- terms(form, data=mf) + if (usewild) form <- wildcard(form, wildnames, re = rewild) + ivxterms <- terms(form, data = mf) # ivx should never contain an intercept - ivx <- delete.icpt(model.matrix(ivxterms, data=mf, contrasts.arg=contrasts)) - if(typeof(ivx) != 'double') storage.mode(ivx) <- 'double' + ivx <- delete.icpt(model.matrix(ivxterms, data = mf, contrasts.arg = contrasts)) + if (typeof(ivx) != "double") storage.mode(ivx) <- "double" setdimnames(ivx, list(NULL, colnames(ivx))) } else { ivy <- NULL ivx <- NULL } - mm <- list(x=x, y=y, ivx=ivx, ivy=ivy, ctrl=ctrl, fl=fl, weights=weights) - mm$extra <- list(icpt=icpt,xterms=xterms,cluster=cluster,Form=Form,ivform=ivform, - inames=inames,naact=naact,model=mf,mfcall=mfcall) - if(onlymm) return(mm) + mm <- list(x = x, y = y, ivx = ivx, ivy = ivy, ctrl = ctrl, fl = fl, weights = weights) + mm$extra <- list( + icpt = icpt, xterms = xterms, cluster = cluster, Form = Form, ivform = ivform, + inames = inames, naact = naact, model = mf, mfcall = mfcall + ) + if (onlymm) { + return(mm) + } mmdemean(mm) } mmdemean <- function(mm) { # orig is necessary to compute the r.residuals, i.e. residuals without dummies # it's used in getfe() and btrap, but is of no use if we have ctrl variables - if(is.null(mm$weights)) - TSS <- apply(mm$y,2,var)*(nrow(mm$y)-1) - else - TSS <- apply(mm$y, 2, function(yy) sum( mm$weights^2*(yy-sum(mm$weights^2*yy/sum(mm$weights^2)))^2)) + if (is.null(mm$weights)) { + TSS <- apply(mm$y, 2, var) * (nrow(mm$y) - 1) + } else { + TSS <- apply(mm$y, 2, function(yy) sum(mm$weights^2 * (yy - sum(mm$weights^2 * yy / sum(mm$weights^2)))^2)) + } names(TSS) <- colnames(mm$y) - if(length(mm$fl) != 0) { - result <- demeanlist(list(y=mm$y, x=mm$x, ivy=mm$ivy, ivx=mm$ivx, ctrl=mm$ctrl), - fl=mm$fl,weights=mm$weights) - if(is.null(mm$ctrl)) result$orig <- list(y=mm$y, x=mm$x, ivy=mm$ivy, ivx=mm$ivx) + if (length(mm$fl) != 0) { + result <- demeanlist(list(y = mm$y, x = mm$x, ivy = mm$ivy, ivx = mm$ivx, ctrl = mm$ctrl), + fl = mm$fl, weights = mm$weights + ) + if (is.null(mm$ctrl)) result$orig <- list(y = mm$y, x = mm$x, ivy = mm$ivy, ivx = mm$ivx) } else { - result <- list(y=mm$y, x=mm$x, ivy=mm$ivy, ivx=mm$ivx, ctrl=mm$ctrl) + result <- list(y = mm$y, x = mm$x, ivy = mm$ivy, ivx = mm$ivx, ctrl = mm$ctrl) } - if(!is.null(result$ctrl)) { + if (!is.null(result$ctrl)) { # pure control variables to project out # do ols, use the residuals as new variables - y <- cbind(result$y,result$x,result$ivy,result$ivx) + y <- cbind(result$y, result$x, result$ivy, result$ivx) x <- result$ctrl result$ctrl <- NULL -# fit <- .lm.fit(x,y) -# my own is much faster for large datasets - fit <- newols(list(y=y,x=x,weights=mm$weights), nostats=TRUE) + # fit <- .lm.fit(x,y) + # my own is much faster for large datasets + fit <- newols(list(y = y, x = x, weights = mm$weights), nostats = TRUE) resid <- as.matrix(fit$residuals) setdimnames(resid, list(NULL, colnames(y))) numctrl <- fit$rank - rm(fit,x,y) + rm(fit, x, y) - result$y <- resid[,colnames(result$y), drop=FALSE] - if(!is.null(result$x)) result$x <- resid[,colnames(result$x), drop=FALSE] - if(!is.null(result$ivy)) result$ivy <- resid[,colnames(result$ivy), drop=FALSE] - if(!is.null(result$ivx)) result$ivx <- resid[,colnames(result$ivx), drop=FALSE] + result$y <- resid[, colnames(result$y), drop = FALSE] + if (!is.null(result$x)) result$x <- resid[, colnames(result$x), drop = FALSE] + if (!is.null(result$ivy)) result$ivy <- resid[, colnames(result$ivy), drop = FALSE] + if (!is.null(result$ivx)) result$ivx <- resid[, colnames(result$ivx), drop = FALSE] rm(resid) } else { numctrl <- 0L @@ -375,58 +428,59 @@ is_nested <- function(f1, f2) { f2 <- as.factor(f2) stopifnot(length(f1) == length(f2)) k <- length(levels(f1)) - sm <- as(methods::new("ngTMatrix", i = as.integer(f2) - 1L, j = as.integer(f1) - - 1L, Dim = c(length(levels(f2)), k)), "CsparseMatrix") + sm <- as(methods::new("ngTMatrix", i = as.integer(f2) - 1L, j = as.integer(f1) - + 1L, Dim = c(length(levels(f2)), k)), "CsparseMatrix") all(sm@p[2:(k + 1L)] - sm@p[1:k] <= 1L) } -newols <- function(mm, stage1=NULL, pf=parent.frame(), nostats=FALSE, exactDOF=FALSE, - kappa=NULL, onlyse=FALSE, psdef=FALSE) { - - if(!is.null(mm$orig)) - orig <- mm$orig - else - orig <- mm +newols <- function(mm, stage1 = NULL, pf = parent.frame(), nostats = FALSE, exactDOF = FALSE, + kappa = NULL, onlyse = FALSE, psdef = FALSE) { + if (!is.null(mm$orig)) { + orig <- mm$orig + } else { + orig <- mm + } weights <- mm$weights - numctrl <- if(is.null(mm$numctrl)) 0 else mm$numctrl - hasicpt <- if(is.null(mm$hasicpt)) FALSE else mm$hasicpt + numctrl <- if (is.null(mm$numctrl)) 0 else mm$numctrl + hasicpt <- if (is.null(mm$hasicpt)) FALSE else mm$hasicpt cfactor <- compfactor(mm$fl) - if(is.numeric(exactDOF)) { + if (is.numeric(exactDOF)) { df <- exactDOF totvar <- nrow(mm$y) - df } else { # numrefs is also used later - numrefs <- nrefs(mm$fl, cfactor, exactDOF) - totvar <- totalpvar(mm$fl)-numrefs + numctrl - df <- nrow(mm$y)-totvar + numrefs <- nrefs(mm$fl, cfactor, exactDOF) + totvar <- totalpvar(mm$fl) - numrefs + numctrl + df <- nrow(mm$y) - totvar } # special case for no covariates - if(is.null(mm$x) || ncol(mm$x) == 0) { - - z <- list(N=nrow(mm$x), - p=totvar,Pp=0, - na.action=mm$na.action, contrasts=mm$contrasts, - df=df, - nostats=FALSE, - numctrl=numctrl, - hasicpt=hasicpt, - lhs=colnames(mm$y), - call=match.call()) - if(!onlyse) { + if (is.null(mm$x) || ncol(mm$x) == 0) { + z <- list( + N = nrow(mm$x), + p = totvar, Pp = 0, + na.action = mm$na.action, contrasts = mm$contrasts, + df = df, + nostats = FALSE, + numctrl = numctrl, + hasicpt = hasicpt, + lhs = colnames(mm$y), + call = match.call() + ) + if (!onlyse) { z$r.residuals <- orig$y z$fe <- mm$fl z$cfactor <- cfactor - z$fitted.values <- orig$y[,colnames(mm$y),drop=FALSE] - mm$y + z$fitted.values <- orig$y[, colnames(mm$y), drop = FALSE] - mm$y z$df.residual <- z$df - z$residuals=mm$y - z$clustervar=mm$cluster + z$residuals <- mm$y + z$clustervar <- mm$cluster } - class(z) <- 'felm' + class(z) <- "felm" return(z) } @@ -438,39 +492,39 @@ newols <- function(mm, stage1=NULL, pf=parent.frame(), nostats=FALSE, exactDOF=F # Indeed, the estimator is (X' (I-kappa M_Z)X)^{-1} X' (I-kappa M_Z) y) # Now, note that I - kappa M_Z = P_Z + (1-kappa)M_Z. So it is the # fitted values plus a fraction of the residuals - + # (see http://www.tandfonline.com/doi/pdf/10.1080/07350015.2014.978175 p 11) - if(!is.null(weights)) iweights <- 1/weights - if(!is.null(weights)) { - .Call(C_scalecols,mm$x,weights) - .Call(C_scalecols,mm$y,weights) + if (!is.null(weights)) iweights <- 1 / weights + if (!is.null(weights)) { + .Call(C_scalecols, mm$x, weights) + .Call(C_scalecols, mm$y, weights) } - if(!is.null(kappa)) { - cp <- crossprod(mm$x) - kappa*crossprod(mm$noinst) - b <- crossprod(mm$x,mm$y) - kappa * crossprod(mm$noinst, mm$y) + if (!is.null(kappa)) { + cp <- crossprod(mm$x) - kappa * crossprod(mm$noinst) + b <- crossprod(mm$x, mm$y) - kappa * crossprod(mm$noinst, mm$y) } else { cp <- crossprod(mm$x) - b <- crossprod(mm$x,mm$y) + b <- crossprod(mm$x, mm$y) } ch <- cholx(cp) - badvars <- attr(ch,'badvars') + badvars <- attr(ch, "badvars") z <- list() - class(z) <- 'felm' - if(is.null(badvars)) { - beta <- backsolve(ch,backsolve(ch,b,transpose=TRUE)) - if(!nostats) z$inv <- chol2inv(ch) + class(z) <- "felm" + if (is.null(badvars)) { + beta <- backsolve(ch, backsolve(ch, b, transpose = TRUE)) + if (!nostats) z$inv <- chol2inv(ch) } else { beta <- matrix(NaN, nrow(cp), ncol(b)) - beta[-badvars,] <- backsolve(ch,backsolve(ch,b[-badvars,], transpose=TRUE)) - if(!nostats) { - z$inv <- matrix(NA,nrow(cp),ncol(cp)) - z$inv[-badvars,-badvars] <- chol2inv(ch) + beta[-badvars, ] <- backsolve(ch, backsolve(ch, b[-badvars, ], transpose = TRUE)) + if (!nostats) { + z$inv <- matrix(NA, nrow(cp), ncol(cp)) + z$inv[-badvars, -badvars] <- chol2inv(ch) } } - if(!nostats && !is.null(kappa)) { + if (!nostats && !is.null(kappa)) { # In k-class with k!=0 and k!=1, the covariance matrix isn't simply the # inverse of cp. This is so because # hatbeta - beta = (X' K X)^{1} X' K' epsilon @@ -478,31 +532,32 @@ newols <- function(mm, stage1=NULL, pf=parent.frame(), nostats=FALSE, exactDOF=F # var(hatbeta-beta) = sigma^2 (X' K X)^{-1} X' K' K X (X' K X)^{-1} # and since K isn't a projection, we do not have K'K = K, so # we can't cancel out one of the (X' K X)^{-1} -# kinv <- z$inv %*% crossprod(mm$x - kappa*mm$noinst) %*% z$inv - kinv <- .Call(C_sandwich,1.0,z$inv,crossprod(mm$x - kappa*mm$noinst)) + # kinv <- z$inv %*% crossprod(mm$x - kappa*mm$noinst) %*% z$inv + kinv <- .Call(C_sandwich, 1.0, z$inv, crossprod(mm$x - kappa * mm$noinst)) } rm(ch, b, cp) -# rownames(beta) <- colnames(orig$x) + # rownames(beta) <- colnames(orig$x) rownames(beta) <- colnames(mm$x) - if(!is.null(weights)) { - .Call(C_scalecols,mm$x,iweights) - .Call(C_scalecols,mm$y,iweights) + if (!is.null(weights)) { + .Call(C_scalecols, mm$x, iweights) + .Call(C_scalecols, mm$y, iweights) } -# z$lhs <- colnames(beta) <- colnames(orig$y) + # z$lhs <- colnames(beta) <- colnames(orig$y) z$lhs <- colnames(beta) <- colnames(mm$y) z$hasicpt <- hasicpt z$TSS <- mm$TSS z$kappa <- kappa - if(is.null(weights)) - z$P.TSS <- apply(mm$y,2,var)*(nrow(mm$y)-1) - else - z$P.TSS <- apply(mm$y, 2, function(yy) sum( weights^2*(yy-sum(weights^2*yy/sum(weights^2)))^2)) + if (is.null(weights)) { + z$P.TSS <- apply(mm$y, 2, var) * (nrow(mm$y) - 1) + } else { + z$P.TSS <- apply(mm$y, 2, function(yy) sum(weights^2 * (yy - sum(weights^2 * yy / sum(weights^2)))^2)) + } names(z$P.TSS) <- colnames(mm$y) - if(!onlyse) z$weights <- weights + if (!onlyse) z$weights <- weights z$numctrl <- numctrl z$coefficients <- z$beta <- beta @@ -514,24 +569,24 @@ newols <- function(mm, stage1=NULL, pf=parent.frame(), nostats=FALSE, exactDOF=F zfit <- mm$x %*% nabeta zresid <- mm$y - zfit - + z$residuals <- zresid - if(!onlyse) { - z$response <- orig$y[,colnames(mm$y),drop=FALSE] + if (!onlyse) { + z$response <- orig$y[, colnames(mm$y), drop = FALSE] z$c.fitted.values <- zfit - z$fitted.values <- z$response-z$residuals -# z$fitted.values <- zfit + z$fitted.values <- z$response - z$residuals + # z$fitted.values <- zfit z$cfactor <- compfactor(mm$fl) z$fe <- mm$fl } z$contrasts <- mm$contrasts - if(!onlyse) { - if(length(mm$fl) != 0) { -# message('dims:');print(dim(orig$y)); print(dim(orig$x)); print(dim(nabeta)) - if(is.null(kappa)) z$r.residuals <- orig$y - orig$x %*% nabeta -# if(!is.null(weights)) .Call(C_scalecols,z$r.residuals,weights) + if (!onlyse) { + if (length(mm$fl) != 0) { + # message('dims:');print(dim(orig$y)); print(dim(orig$x)); print(dim(nabeta)) + if (is.null(kappa)) z$r.residuals <- orig$y - orig$x %*% nabeta + # if(!is.null(weights)) .Call(C_scalecols,z$r.residuals,weights) } else { z$r.residuals <- z$residuals } @@ -544,13 +599,13 @@ newols <- function(mm, stage1=NULL, pf=parent.frame(), nostats=FALSE, exactDOF=F # hmm, what about the r.residuals? We modify them as well. They are # used in kaczmarz(). - if(!is.null(stage1)) { + if (!is.null(stage1)) { # we need the centred response in condfstat() fitnam <- makefitnames(stage1$lhs) - ivresid <- stage1$residuals %*% nabeta[fitnam,,drop=FALSE] - - z$residuals <- z$residuals - ivresid - if(!onlyse) { + ivresid <- stage1$residuals %*% nabeta[fitnam, , drop = FALSE] + + z$residuals <- z$residuals - ivresid + if (!onlyse) { z$c.response <- mm$y z$iv.residuals <- zresid z$r.iv.residuals <- z$r.residuals @@ -559,11 +614,11 @@ newols <- function(mm, stage1=NULL, pf=parent.frame(), nostats=FALSE, exactDOF=F z$fitted.values <- z$response - z$residuals } } - + z$terms <- mm$terms totlev <- totalpvar(mm$fl) - if(is.numeric(exactDOF)) { + if (is.numeric(exactDOF)) { z$df <- exactDOF numdum <- z$N - z$p - z$df z$numrefs <- totlev - numdum @@ -576,225 +631,224 @@ newols <- function(mm, stage1=NULL, pf=parent.frame(), nostats=FALSE, exactDOF=F z$rank <- z$N - z$df z$exactDOF <- exactDOF -# should we subtract 1 for an intercept? -# a similar adjustment is done in summary.felm when computing rdf - z$p <- z$p + numdum #- 1 + # should we subtract 1 for an intercept? + # a similar adjustment is done in summary.felm when computing rdf + z$p <- z$p + numdum #- 1 z$xp <- z$p z$na.action <- mm$na.action - class(z) <- 'felm' + class(z) <- "felm" cluster <- mm$cluster - if(!onlyse) z$clustervar <- cluster + if (!onlyse) z$clustervar <- cluster z$stage1 <- stage1 - if(nostats) { + if (nostats) { z$nostats <- TRUE return(z) } z$nostats <- FALSE -# then we go about creating the covariance matrices and tests -# if there is a single lhs, they are just stored as matrices etc -# in z. If there are multiple lhs, these quantities are inserted -# in a list z$STATS indexed by z$lhs -# indexed by the name of the lhs + # then we go about creating the covariance matrices and tests + # if there is a single lhs, they are just stored as matrices etc + # in z. If there are multiple lhs, these quantities are inserted + # in a list z$STATS indexed by z$lhs + # indexed by the name of the lhs vcvnames <- list(rownames(beta), rownames(beta)) Ncoef <- nrow(beta) singlelhs <- length(z$lhs) == 1 - # preallocate STATS - if(!singlelhs) z$STATS <- list() + # preallocate STATS + if (!singlelhs) z$STATS <- list() z$STATS <- list() - if(is.null(kappa)) { + if (is.null(kappa)) { vinv <- z$inv } else { vinv <- kinv } inv <- nazero(vinv) - + xz <- mm$x - if(!is.null(kappa)) xz <- xz - kappa*mm$noinst - for(lhs in z$lhs) { - res <- z$residuals[,lhs] + if (!is.null(kappa)) xz <- xz - kappa * mm$noinst + for (lhs in z$lhs) { + res <- z$residuals[, lhs] - if(!is.null(weights)) res <- weights*res -# when multiple lhs, vcvfactor is a vector -# we need a list of vcvs in this case + if (!is.null(weights)) res <- weights * res + # when multiple lhs, vcvfactor is a vector + # we need a list of vcvs in this case - vcv <- sum(res**2)/z$df * vinv + vcv <- sum(res**2) / z$df * vinv setdimnames(vcv, vcvnames) z$STATS[[lhs]]$vcv <- vcv - if(singlelhs) z$vcv <- vcv - - # We should make the robust covariance matrix too. - # it's inv * sum (X_i' u_i u_i' X_i) * inv - # where u_i are the (full) residuals (Wooldridge, 10.5.4 (10.59)) - # i.e. inv * sum(u_i^2 X_i' X_i) * inv - # for large datasets the sum is probably best computed by a series of scaled - # rank k updates, i.e. the dsyrk blas routine, we make an R-version of it. - # need to check this computation, the SE's are slightly numerically different from Stata's. - # it seems stata does not do the small-sample adjustment - dfadj <- z$N/z$df - - # Now, here's an optimzation for very large xz. If we use the wcrossprod and ccrossprod - # functions, we can't get rid of xz, we end up with a copy of it which blows away memory. - # we need to scale xz with the residuals in xz, but we don't want to expand res to a full matrix, - # and even get a copy in the result. - # Thus we modify it in place with a .Call. The scaled variant is also used in the cluster computation. - - rscale <- ifelse(res==0,1e-40,res) # make sure nothing is zero - if(!is.null(weights)) rscale <- rscale*weights + if (singlelhs) z$vcv <- vcv + + # We should make the robust covariance matrix too. + # it's inv * sum (X_i' u_i u_i' X_i) * inv + # where u_i are the (full) residuals (Wooldridge, 10.5.4 (10.59)) + # i.e. inv * sum(u_i^2 X_i' X_i) * inv + # for large datasets the sum is probably best computed by a series of scaled + # rank k updates, i.e. the dsyrk blas routine, we make an R-version of it. + # need to check this computation, the SE's are slightly numerically different from Stata's. + # it seems stata does not do the small-sample adjustment + dfadj <- z$N / z$df + + # Now, here's an optimzation for very large xz. If we use the wcrossprod and ccrossprod + # functions, we can't get rid of xz, we end up with a copy of it which blows away memory. + # we need to scale xz with the residuals in xz, but we don't want to expand res to a full matrix, + # and even get a copy in the result. + # Thus we modify it in place with a .Call. The scaled variant is also used in the cluster computation. + + rscale <- ifelse(res == 0, 1e-40, res) # make sure nothing is zero + if (!is.null(weights)) rscale <- rscale * weights # This one scales the columns without copying # For xz, remember to scale it back, because we scale directly into # mm$x .Call(C_scalecols, xz, rscale) # compute inv %*% crossprod(xz) %*% inv - # via a blas dsyrk. Save some memory + # via a blas dsyrk. Save some memory meat <- matrix(0, Ncoef, Ncoef) - .Call(C_dsyrk,0.0,meat,dfadj,xz) - rvcv <- .Call(C_sandwich,1.0,inv,meat) + .Call(C_dsyrk, 0.0, meat, dfadj, xz) + rvcv <- .Call(C_sandwich, 1.0, inv, meat) setdimnames(rvcv, vcvnames) z$STATS[[lhs]]$robustvcv <- rvcv - if(singlelhs) z$robustvcv <- rvcv + if (singlelhs) z$robustvcv <- rvcv rm(meat, rvcv) - # then the clustered covariance matrix - if(!is.null(cluster)) { - method <- attr(cluster,'method') - if(is.null(method)) method <- 'cgm' - dfadj <- (z$N-1)/z$df - + # then the clustered covariance matrix + if (!is.null(cluster)) { + method <- attr(cluster, "method") + if (is.null(method)) method <- "cgm" + dfadj <- (z$N - 1) / z$df + ## GRM: Extra adjustments to the DoF are needed in cases where clusters ## are nested within any of the FEs. See Cameron and Miller (2015, pp. 14-15): ## http://cameron.econ.ucdavis.edu/research/Cameron_Miller_JHR_2015_February.pdf#page=14 - fe_cl_grid <- expand.grid(fe_k=seq_along(z$fe), cl_g=seq_along(cluster)) - any_nested <- + fe_cl_grid <- expand.grid(fe_k = seq_along(z$fe), cl_g = seq_along(cluster)) + any_nested <- vapply( - seq_len(nrow(fe_cl_grid)), + seq_len(nrow(fe_cl_grid)), function(n) { fe_k <- fe_cl_grid$fe_k[n] cl_g <- fe_cl_grid$cl_g[n] is_nested(z$fe[[fe_k]], cluster[[cl_g]]) - }, + }, FUN.VALUE = logical(1) ) ## Find the minimum cluster dimension. Will be used below in the case of - ## multiway clustering, but only if the FEs are nested within a cluster, + ## multiway clustering, but only if the FEs are nested within a cluster, ## or 'cgm2' (or 'reghdfe') is specified for the `cmethod` argument. - min_clust <- + min_clust <- min(vapply( seq_along(cluster), - function(i) nlevels(cluster[[i]]), + function(i) nlevels(cluster[[i]]), FUN.VALUE = integer(1) )) if (any(any_nested)) { - # Will use the simple correction proposed by Gormley and Matsa (RFS, 2014). + # Will use the simple correction proposed by Gormley and Matsa (RFS, 2014). # See: https://www.kellogg.northwestern.edu/faculty/matsa/htm/fe.htm - dfadj <- dfadj * z$df / (z$df + totvar - 1) - # In addition to the above, the nested clusters case requires that + dfadj <- dfadj * z$df / (z$df + totvar - 1) + # In addition to the above, the nested clusters case requires that # regressor p-values are calculated using no. of clusters - 1 degrees # of freedom; similar to "df2" in `waldtest()`. This is straight forward - # when there is only a single cluster variable. In the case of multiway - # clustering, however, we'll conservatively take "no. of clusters" to - # mean the cluster variable with the smallest dimension. If nothing else, - # this should ensure consistency with comparable implementations in + # when there is only a single cluster variable. In the case of multiway + # clustering, however, we'll conservatively take "no. of clusters" to + # mean the cluster variable with the smallest dimension. If nothing else, + # this should ensure consistency with comparable implementations in # Stata (via reghdfe) and Julia (via FixedEffectModels.jl). See also: # https://github.com/matthieugomez/FixedEffectModels.jl/pull/50 - z$df <- min(min_clust-1, z$df) + z$df <- min(min_clust - 1, z$df) z$df.residual <- z$df } ## End of nested cluster adjustment - + d <- length(cluster) - if(method %in% c('cgm', 'cgm2', 'reghdfe')) { - meat <- matrix(0,Ncoef,Ncoef) - for(i in 1:(2^d-1)) { + if (method %in% c("cgm", "cgm2", "reghdfe")) { + meat <- matrix(0, Ncoef, Ncoef) + for (i in 1:(2^d - 1)) { # Find out which ones to interact iac <- as.logical(intToBits(i))[1:d] # odd number is positive, even is negative - sgn <- 2*(sum(iac) %% 2) - 1 + sgn <- 2 * (sum(iac) %% 2) - 1 # interact the factors - ia <- factor(do.call(paste,c(cluster[iac],sep='\004'))) - # CGM2011 (sec 2.3) describe two possible small-sample adjustments + ia <- factor(do.call(paste, c(cluster[iac], sep = "\004"))) + # CGM2011 (sec 2.3) describe two possible small-sample adjustments # using the number of clusters in each cluster category. Note that - # these two approaches should only diverge in the case of multiway + # these two approaches should only diverge in the case of multiway # clustering. - if(method == 'cgm') { + if (method == "cgm") { ## Option 1 (used by GCM2011 in their paper and also our default here) - adj <- sgn*dfadj*nlevels(ia)/(nlevels(ia)-1) + adj <- sgn * dfadj * nlevels(ia) / (nlevels(ia) - 1) } else { ## i.e. if method %in% c('cgm2','reghdfe') ## Option 2 (used by Stata's reghdfe among others, so we'll give it that alias for convenience) - adj <- sgn*dfadj*min_clust/(min_clust-1) + adj <- sgn * dfadj * min_clust / (min_clust - 1) ## Will also need to adjust DoF used to calculate p-vals and CIs - z$df <- min(min_clust-1, z$df) + z$df <- min(min_clust - 1, z$df) z$df.residual <- z$df } - .Call(C_dsyrk,1.0,meat,adj,Crowsum(xz,ia)) + .Call(C_dsyrk, 1.0, meat, adj, crowsum(xz, ia)) } - cvcv <- .Call(C_sandwich,1.0,inv,meat) - if(psdef && d > 1) { + cvcv <- .Call(C_sandwich, 1.0, inv, meat) + if (psdef && d > 1) { ev <- eigen(cvcv) badev <- Im(ev$values) != 0 | Re(ev$values) < 0 - if(any(badev)) { - warning('Negative eigenvalues set to zero in multiway clustered variance matrix. See felm(...,psdef=FALSE)') + if (any(badev)) { + warning("Negative eigenvalues set to zero in multiway clustered variance matrix. See felm(...,psdef=FALSE)") ev$values[badev] <- 0 cvcv <- Re(ev$vectors %*% diag(ev$values) %*% t(ev$vectors)) } -# if(any(Im(ev$values) != 0 | Re(ev$values) < 0)) { -# warning('Negative eigenvalues set to zero in multiway clustered variance matrix. See felm(...,psdef=FALSE)') -# cvcv <- ev$vectors %*% diag(pmax(ev$values,0)) %*% t(ev$vectors) -# } + # if(any(Im(ev$values) != 0 | Re(ev$values) < 0)) { + # warning('Negative eigenvalues set to zero in multiway clustered variance matrix. See felm(...,psdef=FALSE)') + # cvcv <- ev$vectors %*% diag(pmax(ev$values,0)) %*% t(ev$vectors) + # } rm(ev) } setdimnames(cvcv, vcvnames) z$STATS[[lhs]]$clustervcv <- cvcv - if(singlelhs) z$clustervcv <- cvcv - rm(meat,cvcv) - - } else if(method == 'gaure') { - # .Call(C_scalecols, xz, 1/rscale) - meat <- matrix(0,nrow(z$vcv),ncol(z$vcv)) + if (singlelhs) z$clustervcv <- cvcv + rm(meat, cvcv) + } else if (method == "gaure") { + # .Call(C_scalecols, xz, 1/rscale) + meat <- matrix(0, nrow(z$vcv), ncol(z$vcv)) # scale the columns according to group size - sc <- apply(sapply(cluster, function(f) table(f)[f]),1,mean) - xc <- demeanlist(xz,cluster, means=TRUE) - .Call(C_scalecols, xc, sqrt(sc)) + sc <- apply(sapply(cluster, function(f) table(f)[f]), 1, mean) + xc <- demeanlist(xz, cluster, means = TRUE) + .Call(C_scalecols, xc, sqrt(sc)) adj <- dfadj -# adj <- adj*prod(sapply(cluster, function(f) nlevels(f)/(nlevels(f)-1))) + # adj <- adj*prod(sapply(cluster, function(f) nlevels(f)/(nlevels(f)-1))) .Call(C_dsyrk, 1, meat, adj, xc) - cvcv <- .Call(C_sandwich,1.0,inv,meat) + cvcv <- .Call(C_sandwich, 1.0, inv, meat) setdimnames(cvcv, vcvnames) z$STATS[[lhs]]$clustervcv <- cvcv - if(singlelhs) z$clustervcv <- cvcv - rm(meat,cvcv) -# .Call(C_scalecols, xz, rscale) + if (singlelhs) z$clustervcv <- cvcv + rm(meat, cvcv) + # .Call(C_scalecols, xz, rscale) } else { - stop('unknown multi way cluster algorithm:',method) + stop("unknown multi way cluster algorithm:", method) } - - + + z$STATS[[lhs]]$cse <- sqrt(diag(z$STATS[[lhs]]$clustervcv)) - z$STATS[[lhs]]$ctval <- z$coefficients[,lhs]/z$STATS[[lhs]]$cse - z$STATS[[lhs]]$cpval <- 2*pt(abs(z$STATS[[lhs]]$ctval),z$df,lower.tail=FALSE) - - if(singlelhs) { + z$STATS[[lhs]]$ctval <- z$coefficients[, lhs] / z$STATS[[lhs]]$cse + z$STATS[[lhs]]$cpval <- 2 * pt(abs(z$STATS[[lhs]]$ctval), z$df, lower.tail = FALSE) + + if (singlelhs) { z$cse <- z$STATS[[lhs]]$cse z$ctval <- z$STATS[[lhs]]$ctval z$cpval <- z$STATS[[lhs]]$cpval - } + } } z$STATS[[lhs]]$se <- sqrt(diag(z$STATS[[lhs]]$vcv)) - z$STATS[[lhs]]$tval <- z$coefficients[,lhs]/z$STATS[[lhs]]$se - z$STATS[[lhs]]$pval <- 2*pt(abs(z$STATS[[lhs]]$tval),z$df,lower.tail=FALSE) - + z$STATS[[lhs]]$tval <- z$coefficients[, lhs] / z$STATS[[lhs]]$se + z$STATS[[lhs]]$pval <- 2 * pt(abs(z$STATS[[lhs]]$tval), z$df, lower.tail = FALSE) + z$STATS[[lhs]]$rse <- sqrt(diag(z$STATS[[lhs]]$robustvcv)) - z$STATS[[lhs]]$rtval <- z$coefficients[,lhs]/z$STATS[[lhs]]$rse - z$STATS[[lhs]]$rpval <- 2*pt(abs(z$STATS[[lhs]]$rtval),z$df,lower.tail=FALSE) + z$STATS[[lhs]]$rtval <- z$coefficients[, lhs] / z$STATS[[lhs]]$rse + z$STATS[[lhs]]$rpval <- 2 * pt(abs(z$STATS[[lhs]]$rtval), z$df, lower.tail = FALSE) - if(singlelhs) { + if (singlelhs) { z$se <- z$STATS[[lhs]]$se z$tval <- z$STATS[[lhs]]$tval z$pval <- z$STATS[[lhs]]$pval @@ -805,9 +859,9 @@ newols <- function(mm, stage1=NULL, pf=parent.frame(), nostats=FALSE, exactDOF=F } # reset this for next lhs - .Call(C_scalecols, xz, 1/rscale) + .Call(C_scalecols, xz, 1 / rscale) } - if(onlyse) z$residuals <- NULL + if (onlyse) z$residuals <- NULL z } @@ -818,588 +872,632 @@ newols <- function(mm, stage1=NULL, pf=parent.frame(), nostats=FALSE, exactDOF=F #' Fit a linear model with multiple group fixed effects -#' +#' #' 'felm' is used to fit linear models with multiple group fixed effects, #' similarly to lm. It uses the Method of Alternating projections to sweep out #' multiple group effects from the normal equations before estimating the #' remaining coefficients with OLS. -#' +#' #' This function is intended for use with large datasets with multiple group #' effects of large cardinality. If dummy-encoding the group effects results #' in a manageable number of coefficients, you are probably better off by using -#' \code{\link{lm}}. -#' +#' [lm()]. +#' #' The formula specification is a response variable followed by a four part #' formula. The first part consists of ordinary covariates, the second part #' consists of factors to be projected out. The third part is an #' IV-specification. The fourth part is a cluster specification for the -#' standard errors. I.e. something like \code{y ~ x1 + x2 | f1 + f2 | (Q|W ~ -#' x3+x4) | clu1 + clu2} where \code{y} is the response, \code{x1,x2} are -#' ordinary covariates, \code{f1,f2} are factors to be projected out, \code{Q} -#' and \code{W} are covariates which are instrumented by \code{x3} and -#' \code{x4}, and \code{clu1,clu2} are factors to be used for computing cluster +#' standard errors. I.e. something like `y ~ x1 + x2 | f1 + f2 | (Q|W ~ +#' x3+x4) | clu1 + clu2` where `y` is the response, `x1,x2` are +#' ordinary covariates, `f1,f2` are factors to be projected out, `Q` +#' and `W` are covariates which are instrumented by `x3` and +#' `x4`, and `clu1,clu2` are factors to be used for computing cluster #' robust standard errors. Parts that are not used should be specified as -#' \code{0}, except if it's at the end of the formula, where they can be -#' omitted. The parentheses are needed in the third part since \code{|} has -#' higher precedence than \code{~}. Multiple left hand sides like \code{y|w|x ~ -#' x1 + x2 |f1+f2|...} are allowed. -#' -#' Interactions between a covariate \code{x} and a factor \code{f} can be -#' projected out with the syntax \code{x:f}. The terms in the second and +#' `0`, except if it's at the end of the formula, where they can be +#' omitted. The parentheses are needed in the third part since `|` has +#' higher precedence than `~`. Multiple left hand sides like `y|w|x ~ +#' x1 + x2 |f1+f2|...` are allowed. +#' +#' Interactions between a covariate `x` and a factor `f` can be +#' projected out with the syntax `x:f`. The terms in the second and #' fourth parts are not treated as ordinary formulas, in particular it is not -#' possible with things like \code{y ~ x1 | x*f}, rather one would specify -#' \code{y ~ x1 + x | x:f + f}. Note that \code{f:x} also works, since R's +#' possible with things like `y ~ x1 | x*f`, rather one would specify +#' `y ~ x1 + x | x:f + f`. Note that `f:x` also works, since R's #' parser does not keep the order. This means that in interactions, the factor -#' \emph{must} be a factor, whereas a non-interacted factor will be coerced to -#' a factor. I.e. in \code{y ~ x1 | x:f1 + f2}, the \code{f1} must be a factor, -#' whereas it will work as expected if \code{f2} is an integer vector. -#' -#' In older versions of \pkg{lfe} the syntax was \code{felm(y ~ x1 + x2 + G(f1) -#' + G(f2), iv=list(Q ~ x3+x4, W ~ x3+x4), clustervar=c('clu1','clu2'))}. This -#' syntax still works, but yields a warning. Users are \emph{strongly} +#' *must* be a factor, whereas a non-interacted factor will be coerced to +#' a factor. I.e. in `y ~ x1 | x:f1 + f2`, the `f1` must be a factor, +#' whereas it will work as expected if `f2` is an integer vector. +#' +#' In older versions of \pkg{lfe} the syntax was `felm(y ~ x1 + x2 + G(f1) +#' + G(f2), iv=list(Q ~ x3+x4, W ~ x3+x4), clustervar=c('clu1','clu2'))`. This +#' syntax still works, but yields a warning. Users are *strongly* #' encouraged to change to the new multipart formula syntax. The old syntax #' will be removed at a later time. -#' +#' #' The standard errors are adjusted for the reduced degrees of freedom coming #' from the dummies which are implicitly present. (An exception occurs in the -#' case of clustered standard errors and, specifically, where clusters are -#' nested within fixed effects; see -#' \href{https://github.com/sgaure/lfe/issues/1#issuecomment-528643802}{here}.) +#' case of clustered standard errors and, specifically, where clusters are +#' nested within fixed effects; see +#' [here](https://github.com/sgaure/lfe/issues/1#issuecomment-528643802).) #' In the case of two factors, #' the exact number of implicit dummies is easy to compute. If there are more #' factors, the number of dummies is estimated by assuming there's one #' reference-level for each factor, this may be a slight over-estimation, -#' leading to slightly too large standard errors. Setting \code{exactDOF='rM'} -#' computes the exact degrees of freedom with \code{rankMatrix()} in package +#' leading to slightly too large standard errors. Setting `exactDOF='rM'` +#' computes the exact degrees of freedom with `rankMatrix()` in package #' \pkg{Matrix}. -#' +#' #' For the iv-part of the formula, it is only necessary to include the #' instruments on the right hand side. The other explanatory covariates, from -#' the first and second part of \code{formula}, are added automatically in the +#' the first and second part of `formula`, are added automatically in the #' first stage regression. See the examples. -#' -#' The \code{contrasts} argument is similar to the one in \code{lm()}, it is +#' +#' The `contrasts` argument is similar to the one in `lm()`, it is #' used for factors in the first part of the formula. The factors in the second -#' part are analyzed as part of a possible subsequent \code{getfe()} call. -#' -#' The \code{cmethod} argument may affect the clustered covariance matrix (and -#' thus regressor standard errors), either directly or via adjustments to a +#' part are analyzed as part of a possible subsequent `getfe()` call. +#' +#' The `cmethod` argument may affect the clustered covariance matrix (and +#' thus regressor standard errors), either directly or via adjustments to a #' degrees of freedom scaling factor. In particular, Cameron, Gelbach and Miller -#' (CGM2011, sec. 2.3) describe two possible small cluster corrections that are +#' (CGM2011, sec. 2.3) describe two possible small cluster corrections that are #' relevant in the case of multiway clustering. \itemize{ -#' \item The first approach adjusts each component of the cluster-robust -#' variance estimator (CRVE) by its own \eqn{c_i} adjustment factor. For -#' example, the first component (with \eqn{G} clusters) is adjusted by -#' \eqn{c_1=\frac{G}{G-1}\frac{N-1}{N-K}}{c_1 = G/(G-1)*(N-1)/(N-K)}, +#' \item The first approach adjusts each component of the cluster-robust +#' variance estimator (CRVE) by its own \eqn{c_i} adjustment factor. For +#' example, the first component (with \eqn{G} clusters) is adjusted by +#' \eqn{c_1=\frac{G}{G-1}\frac{N-1}{N-K}}{c_1 = G/(G-1)*(N-1)/(N-K)}, #' the second component (with \eqn{H} clusters) is adjusted #' by \eqn{c_2=\frac{H}{H-1}\frac{N-1}{N-K}}{c_2 = H/(H-1)*(N-1)/(N-K)}, etc. #' \item The second approach applies the same adjustment to all CRVE components: #' \eqn{c=\frac{J}{J-1}\frac{N-1}{N-K}}{c = J/(J-1)*(N-1)/(N-K)}, where -#' \eqn{J=\min(G,H)}{J=min(G,H)} in the case of two-way clustering, for example. +#' \eqn{J=\min(G,H)}{J=min(G,H)} in the case of two-way clustering, for example. #' } -#' Any differences resulting from these two approaches are likely to be minor, -#' and they will obviously yield exactly the same results when there is only one -#' cluster dimension. Still, CGM2011 adopt the former approach in their own -#' paper and simulations. This is also the default method that \code{felm} uses -#' (i.e. \code{cmethod = 'cgm'}). However, the latter approach has since been -#' adopted by several other packages that allow for robust inference with +#' Any differences resulting from these two approaches are likely to be minor, +#' and they will obviously yield exactly the same results when there is only one +#' cluster dimension. Still, CGM2011 adopt the former approach in their own +#' paper and simulations. This is also the default method that `felm` uses +#' (i.e. `cmethod = 'cgm'`). However, the latter approach has since been +#' adopted by several other packages that allow for robust inference with #' multiway clustering. This includes the popular Stata package -#' \href{http://scorreia.com/software/reghdfe/}{reghdfe}, as well as the -#' \href{https://github.com/matthieugomez/FixedEffectModels.jl}{FixedEffectModels.jl} +#' [reghdfe](http://scorreia.com/software/reghdfe/), as well as the +#' [FixedEffectModels.jl](https://github.com/matthieugomez/FixedEffectModels.jl) #' implementation in Julia. To match results from these packages exactly, use -#' \code{cmethod = 'cgm2'} (or its alias, \code{cmethod = 'reghdfe'}). It is -#' possible that some residual differences may still remain; see discussion -#' \href{https://github.com/sgaure/lfe/issues/1#issuecomment-530561314}{here}. -#' -#' The old syntax with a single part formula with the \code{G()} syntax for the +#' `cmethod = 'cgm2'` (or its alias, `cmethod = 'reghdfe'`). It is +#' possible that some residual differences may still remain; see discussion +#' [here](https://github.com/sgaure/lfe/issues/1#issuecomment-530561314). +#' +#' The old syntax with a single part formula with the `G()` syntax for the #' factors to transform away is still supported, as well as the -#' \code{clustervar} and \code{iv} arguments, but users are encouraged to move -#' to the new multi part formulas as described here. The \code{clustervar} and -#' \code{iv} arguments have been moved to the \code{...} argument list. They +#' `clustervar` and `iv` arguments, but users are encouraged to move +#' to the new multi part formulas as described here. The `clustervar` and +#' `iv` arguments have been moved to the `...` argument list. They #' will be removed in some future update. -#' +#' #' @param formula an object of class '"formula"' (or one that can be coerced to #' that class): a symbolic description of the model to be fitted. Similarly to #' 'lm'. See Details. #' @param data a data frame containing the variables of the model. #' @param exactDOF logical. If more than two factors, the degrees of freedom #' used to scale the covariance matrix (and the standard errors) is normally -#' estimated. Setting \code{exactDOF=TRUE} causes \code{felm} to attempt to +#' estimated. Setting `exactDOF=TRUE` causes `felm` to attempt to #' compute it, but this may fail if there are too many levels in the factors. -#' \code{exactDOF='rM'} will use the exact method in -#' \code{Matrix::rankMatrix()}, but this is slower. If neither of these methods -#' works, it is possible to specify \code{exactDOF='mc'}, which utilizes a +#' `exactDOF='rM'` will use the exact method in +#' `Matrix::rankMatrix()`, but this is slower. If neither of these methods +#' works, it is possible to specify `exactDOF='mc'`, which utilizes a #' Monte-Carlo method to estimate the expectation E(x' P x) = tr(P), the trace #' of a certain projection, a method which may be more accurate than the #' default guess. -#' +#' #' If the degrees of freedom for some reason are known, they can be specified -#' like \code{exactDOF=342772}. +#' like `exactDOF=342772`. #' @param subset an optional vector specifying a subset of observations to be #' used in the fitting process. #' @param na.action a function which indicates what should happen when the data -#' contain \code{NA}s. The default is set by the \code{na.action} setting of -#' \code{options}, and is \code{na.fail} if that is unset. The 'factory-fresh' -#' default is \code{na.omit}. Another possible value is \code{NULL}, no -#' action. \code{na.exclude} is currently not supported. -#' @param contrasts an optional list. See the \code{contrasts.arg} of -#' \code{model.matrix.default}. +#' contain `NA`s. The default is set by the `na.action` setting of +#' `options`, and is `na.fail` if that is unset. The 'factory-fresh' +#' default is `na.omit`. Another possible value is `NULL`, no +#' action. `na.exclude` is currently not supported. +#' @param contrasts an optional list. See the `contrasts.arg` of +#' `model.matrix.default`. #' @param weights an optional vector of weights to be used in the fitting #' process. Should be 'NULL' or a numeric vector. If non-NULL, weighted least -#' squares is used with weights \code{weights} (that is, minimizing -#' \code{sum(w*e^2)}); otherwise ordinary least squares is used. +#' squares is used with weights `weights` (that is, minimizing +#' `sum(w*e^2)`); otherwise ordinary least squares is used. #' @param ... other arguments. \itemize{ -#' -#' \item \code{cmethod} character. Which clustering method to use. Known -#' arguments are \code{'cgm'} (the default), \code{'cgm2'} (or \code{'reghdfe'}, -#' its alias). These alternate methods will generally +#' +#' \item `cmethod` character. Which clustering method to use. Known +#' arguments are `'cgm'` (the default), `'cgm2'` (or `'reghdfe'`, +#' its alias). These alternate methods will generally #' yield equivalent results, except in the case of multiway clustering with few -#' clusters along at least one dimension. -#' -#' \item \code{keepX} logical. To include a copy of the expanded data matrix in -#' the return value, as needed by \code{\link{bccorr}} and \code{\link{fevcov}} +#' clusters along at least one dimension. +#' +#' \item `keepX` logical. To include a copy of the expanded data matrix in +#' the return value, as needed by [bccorr()] and [fevcov()] #' for proper limited mobility bias correction. -#' -#' \item \code{keepCX} logical. Keep a copy of the centred expanded data matrix -#' in the return value. As list elements \code{cX} for the explanatory -#' variables, and \code{cY} for the outcome. #' -#' \item \code{keepModel} logical. Keep a copy of the model frame. -#' -#' \item \code{nostats} logical. Don't include covariance matrices in the +#' \item `keepCX` logical. Keep a copy of the centred expanded data matrix +#' in the return value. As list elements `cX` for the explanatory +#' variables, and `cY` for the outcome. +#' +#' \item `keepModel` logical. Keep a copy of the model frame. +#' +#' \item `nostats` logical. Don't include covariance matrices in the #' output, just the estimated coefficients and various descriptive information. -#' For IV, \code{nostats} can be a logical vector of length 2, with the last -#' value being used for the 1st stages. \item \code{psdef} logical. In case of +#' For IV, `nostats` can be a logical vector of length 2, with the last +#' value being used for the 1st stages. \item `psdef` logical. In case of #' multiway clustering, the method of Cameron, Gelbach and Miller may yield a #' non-definite variance matrix. Ordinarily this is forced to be semidefinite -#' by setting negative eigenvalues to zero. Setting \code{psdef=FALSE} will +#' by setting negative eigenvalues to zero. Setting `psdef=FALSE` will #' switch off this adjustment. Since the variance estimator is asymptotically #' correct, this should only have an effect when the clustering factors have #' very few levels. -#' -#' \item \code{kclass} character. For use with instrumental variables. Use a -#' k-class estimator rather than 2SLS/IV. Currently, the values \code{'nagar', -#' 'b2sls', 'mb2sls', 'liml'} are accepted, where the names are from +#' +#' \item `kclass` character. For use with instrumental variables. Use a +#' k-class estimator rather than 2SLS/IV. Currently, the values `'nagar', +#' 'b2sls', 'mb2sls', 'liml'` are accepted, where the names are from #' \cite{Kolesar et al (2014)}, as well as a numeric value for the 'k' in -#' k-class. With \code{kclass='liml'}, \code{felm} also accepts the argument -#' \code{fuller=}, for using a Fuller adjustment of the +#' k-class. With `kclass='liml'`, `felm` also accepts the argument +#' `fuller=`, for using a Fuller adjustment of the #' liml-estimator. -#' -#' \item \code{Nboot, bootexpr, bootcluster} Since \code{felm} has quite a bit +#' +#' \item `Nboot, bootexpr, bootcluster` Since `felm` has quite a bit #' of overhead in the creation of the model matrix, if one wants confidence #' intervals for some function of the estimated parameters, it is possible to -#' bootstrap internally in \code{felm}. That is, the model matrix is resampled -#' \code{Nboot} times and estimated, and the \code{bootexpr} is evaluated -#' inside an \code{sapply}. The estimated coefficients and the left hand -#' side(s) are available by name. Any right hand side variable \code{x} is -#' available by the name \code{var.x}. The \code{"felm"}-object for each -#' estimation is available as \code{est}. If a \code{bootcluster} is specified -#' as a factor, entire levels are resampled. \code{bootcluster} can also be a +#' bootstrap internally in `felm`. That is, the model matrix is resampled +#' `Nboot` times and estimated, and the `bootexpr` is evaluated +#' inside an `sapply`. The estimated coefficients and the left hand +#' side(s) are available by name. Any right hand side variable `x` is +#' available by the name `var.x`. The `"felm"`-object for each +#' estimation is available as `est`. If a `bootcluster` is specified +#' as a factor, entire levels are resampled. `bootcluster` can also be a #' function with no arguments, it should return a vector of integers, the rows #' to use in the sample. It can also be the string 'model', in which case the -#' cluster is taken from the model. \code{bootexpr} should be an expression, -#' e.g. like \code{quote(x/x2 * abs(x3)/mean(y))}. It could be wise to specify -#' \code{nostats=TRUE} when bootstrapping, unless the covariance matrices are +#' cluster is taken from the model. `bootexpr` should be an expression, +#' e.g. like `quote(x/x2 * abs(x3)/mean(y))`. It could be wise to specify +#' `nostats=TRUE` when bootstrapping, unless the covariance matrices are #' needed in the bootstrap. If you need the covariance matrices in the full #' estimate, but not in the bootstrap, you can specify it in an attribute -#' \code{"boot"} as \code{nostats=structure(FALSE, boot=TRUE)}. -#' -#' \item \code{iv, clustervar} deprecated. These arguments will be removed at +#' `"boot"` as `nostats=structure(FALSE, boot=TRUE)`. +#' +#' \item `iv, clustervar` deprecated. These arguments will be removed at #' a later time, but are still supported in this field. Users are -#' \emph{STRONGLY} encouraged to use multipart formulas instead. In +#' *STRONGLY* encouraged to use multipart formulas instead. In #' particular, not all functionality is supported with the deprecated syntax; #' iv-estimations actually run a lot faster if multipart formulas are used, due #' to new algorithms which I didn't bother to shoehorn in place for the #' deprecated syntax. -#' +#' #' } -#' @return \code{felm} returns an object of \code{class} \code{"felm"}. It is -#' quite similar to an \code{"lm"} object, but not entirely compatible. -#' -#' The generic \code{summary}-method will yield a summary which may be -#' \code{print}'ed. The object has some resemblance to an \code{'lm'} object, -#' and some postprocessing methods designed for \code{lm} may happen to work. +#' @return `felm` returns an object of `class` `"felm"`. It is +#' quite similar to an `"lm"` object, but not entirely compatible. +#' +#' The generic `summary`-method will yield a summary which may be +#' `print`'ed. The object has some resemblance to an `'lm'` object, +#' and some postprocessing methods designed for `lm` may happen to work. #' It may however be necessary to coerce the object to succeed with this. -#' -#' The \code{"felm"} object is a list containing the following fields: -#' +#' +#' The `"felm"` object is a list containing the following fields: +#' #' \item{coefficients}{a numerical vector. The estimated coefficients.} #' \item{N}{an integer. The number of observations} \item{p}{an integer. The #' total number of coefficients, including those projected out.} #' \item{response}{a numerical vector. The response vector.} #' \item{fitted.values}{a numerical vector. The fitted values.} -#' +#' #' \item{residuals}{a numerical vector. The residuals of the full system, with #' dummies. For IV-estimations, this is the residuals when the original #' endogenous variables are used, not their predictions from the 1st stage.} -#' +#' #' \item{r.residuals}{a numerical vector. Reduced residuals, i.e. the residuals -#' resulting from predicting \emph{without} the dummies.} -#' +#' resulting from predicting *without* the dummies.} +#' #' \item{iv.residuals}{numerical vector. When using instrumental variables, #' residuals from 2. stage, i.e. when predicting with the predicted endogenous #' variables from the 1st stage.} -#' -#' \item{weights}{numeric. The square root of the argument \code{weights}.} -#' +#' +#' \item{weights}{numeric. The square root of the argument `weights`.} +#' #' \item{cfactor}{factor of length N. The factor describing the connected #' components of the two first terms in the second part of the model formula.} -#' +#' #' \item{vcv}{a matrix. The variance-covariance matrix.} -#' +#' #' \item{fe}{list of factors. A list of the terms in the second part of the #' model formula.} -#' -#' \item{stage1}{The '\code{felm}' objects for the IV 1st stage, if used. The +#' +#' \item{stage1}{The '`felm`' objects for the IV 1st stage, if used. The #' 1st stage has multiple left hand sides if there are more than one #' instrumented variable.} -#' +#' #' \item{iv1fstat}{list of numerical vectors. For IV 1st stage, F-value for #' excluded instruments, the number of parameters in restricted model and in #' the unrestricted model.} -#' +#' #' \item{X}{matrix. The expanded data matrix, i.e. from the first part of the #' formula. To save memory with large datasets, it is only included if -#' \code{felm(keepX=TRUE)} is specified. Must be included if -#' \code{\link{bccorr}} or \code{\link{fevcov}} is to be used for correcting +#' `felm(keepX=TRUE)` is specified. Must be included if +#' [bccorr()] or [fevcov()] is to be used for correcting #' limited mobility bias. } -#' +#' #' \item{cX, cY}{matrix. The centred expanded data matrix. Only included if -#' \code{felm(keepCX=TRUE)}. } -#' -#' \item{boot}{The result of a \code{replicate} applied to the \code{bootexpr} +#' `felm(keepCX=TRUE)`. } +#' +#' \item{boot}{The result of a `replicate` applied to the `bootexpr` #' (if used).} -#' +#' #' @note -#' Side effect: If \code{data} is an object of class \code{"pdata.frame"} (from +#' Side effect: If `data` is an object of class `"pdata.frame"` (from #' the \pkg{plm} package), the \pkg{plm} namespace is loaded if available, and -#' \code{data} is coerced to a \code{"data.frame"} with \code{as.data.frame} +#' `data` is coerced to a `"data.frame"` with `as.data.frame` #' which dispatches to a \pkg{plm} method. This ensures that transformations -#' like \code{diff} and \code{lag} from \pkg{plm} works as expected, but it -#' also incurs an additional copy of the \code{data}, and the \pkg{plm} -#' namespace remains loaded after \code{felm} returns. When working with -#' \code{"pdata.frame"}s, this is what is usually wanted anyway. -#' +#' like `diff` and `lag` from \pkg{plm} works as expected, but it +#' also incurs an additional copy of the `data`, and the \pkg{plm} +#' namespace remains loaded after `felm` returns. When working with +#' `"pdata.frame"`s, this is what is usually wanted anyway. +#' #' For technical reasons, when running IV-estimations, the data frame supplied -#' in the \code{data} argument to \code{felm}, should \emph{not} contain -#' variables with names ending in \code{'(fit)'}. Variables with such names -#' are used internally by \code{felm}, and may then accidentally be looked up +#' in the `data` argument to `felm`, should *not* contain +#' variables with names ending in `'(fit)'`. Variables with such names +#' are used internally by `felm`, and may then accidentally be looked up #' in the data frame instead of the local environment where they are defined. -#' @seealso \code{\link{getfe}} \code{\link{summary.felm}} -#' \code{\link{condfstat}} \code{\link{waldtest}} +#' @seealso [getfe()] [summary.felm()] +#' [condfstat()] [waldtest()] #' @references Cameron, A.C., J.B. Gelbach and D.L. Miller (2011) \cite{Robust #' inference with multiway clustering}, Journal of Business & Economic #' Statistics 29 (2011), no. 2, 238--249. -#' \url{http://dx.doi.org/10.1198/jbes.2010.07136} -#' +#' \doi{10.1198/jbes.2010.07136} +#' #' Kolesar, M., R. Chetty, J. Friedman, E. Glaeser, and G.W. Imbens (2014) #' \cite{Identification and Inference with Many Invalid Instruments}, Journal #' of Business & Economic Statistics (to appear). -#' \url{http://dx.doi.org/10.1080/07350015.2014.978175} +#' \doi{10.1080/07350015.2014.978175} #' @examples -#' -#' oldopts <- options(lfe.threads=1) -#' +#' +#' ## Default is to use all cores. We'll limit it to 2 for this example. +#' oldopts <- options("lfe.threads") +#' options(lfe.threads = 2) +#' #' ## Simulate data -#' -#' # Covariates -#' x <- rnorm(1000) -#' x2 <- rnorm(length(x)) -#' # Individuals and firms -#' id <- factor(sample(20,length(x),replace=TRUE)) -#' firm <- factor(sample(13,length(x),replace=TRUE)) -#' # Effects for them -#' id.eff <- rnorm(nlevels(id)) -#' firm.eff <- rnorm(nlevels(firm)) +#' set.seed(42) +#' n <- 1e3 +#' +#' d <- data.frame( +#' # Covariates +#' x1 = rnorm(n), +#' x2 = rnorm(n), +#' # Individuals and firms +#' id = factor(sample(20, n, replace = TRUE)), +#' firm = factor(sample(13, n, replace = TRUE)), +#' # Noise +#' u = rnorm(n) +#' ) +#' +#' # Effects for individuals and firms +#' id.eff <- rnorm(nlevels(d$id)) +#' firm.eff <- rnorm(nlevels(d$firm)) +#' #' # Left hand side -#' u <- rnorm(length(x)) -#' y <- x + 0.5*x2 + id.eff[id] + firm.eff[firm] + u -#' +#' d$y <- d$x1 + 0.5 * d$x2 + id.eff[d$id] + firm.eff[d$firm] + d$u +#' #' ## Estimate the model and print the results -#' est <- felm(y ~ x + x2 | id + firm) +#' est <- felm(y ~ x1 + x2 | id + firm, data = d) #' summary(est) -#' -#' \dontrun{ #' # Compare with lm -#' summary(lm(y ~ x + x2 + id + firm-1))} -#' +#' summary(lm(y ~ x1 + x2 + id + firm - 1, data = d)) +#' #' ## Example with 'reverse causation' (IV regression) -#' +#' #' # Q and W are instrumented by x3 and the factor x4. -#' x3 <- rnorm(length(x)) -#' x4 <- sample(12,length(x),replace=TRUE) -#' Q <- 0.3*x3 + x + 0.2*x2 + id.eff[id] + 0.3*log(x4) - 0.3*y + rnorm(length(x),sd=0.3) -#' W <- 0.7*x3 - 2*x + 0.1*x2 - 0.7*id.eff[id] + 0.8*cos(x4) - 0.2*y+ rnorm(length(x),sd=0.6) +#' d$x3 <- rnorm(n) +#' d$x4 <- sample(12, n, replace = TRUE) +#' d$Q <- 0.3 * d$x3 + d$x1 + 0.2 * d$x2 + id.eff[d$id] + 0.3 * log(d$x4) - 0.3 * d$y + +#' rnorm(n, sd = 0.3) +#' d$W <- 0.7 * d$x3 - 2 * d$x1 + 0.1 * d$x2 - 0.7 * id.eff[d$id] + 0.8 * cos(d$x4) - +#' 0.2 * d$y + rnorm(n, sd = 0.6) +#' #' # Add them to the outcome variable -#' y <- y + Q + W -#' +#' d$y <- d$y + d$Q + d$W +#' #' ## Estimate the IV model and report robust SEs -#' ivest <- felm(y ~ x + x2 | id + firm | (Q|W ~ x3 + factor(x4))) -#' summary(ivest, robust=TRUE) +#' ivest <- felm(y ~ x1 + x2 | id + firm | (Q | W ~ x3 + factor(x4)), data = d) +#' summary(ivest, robust = TRUE) #' condfstat(ivest) -#' -#' \dontrun{ #' # Compare with the not instrumented fit: -#' summary(felm(y ~ x + x2 + Q + W | id + firm))} -#' +#' summary(felm(y ~ x1 + x2 + Q + W | id + firm, data = d)) +#' #' ## Example with multiway clustering -#' +#' #' # Create a large cluster group (500 clusters) and a small one (20 clusters) -#' cl1 <- factor(sample(rep(1:500, length.out=length(x)))) -#' cl2 <- factor(sample(rep(1:20, length.out=length(x)))) -#' # Function for adding clustered noise to our outcome variable +#' d$cl1 <- factor(sample(rep(1:500, length.out = n))) +#' d$cl2 <- factor(sample(rep(1:20, length.out = n))) +#' # Function for adding clustered noise to our outcome variable #' cl_noise <- function(cl) { -#' obs_per_cluster <- length(x)/nlevels(cl) -#' unlist(replicate(nlevels(cl), rnorm(obs_per_cluster, mean=rnorm(1), sd=runif(1)), simplify=FALSE)) +#' obs_per_cluster <- n / nlevels(cl) +#' unlist(replicate(nlevels(cl), +#' rnorm(obs_per_cluster, mean = rnorm(1), sd = runif(1)), +#' simplify = FALSE +#' )) #' } +#' #' # New outcome variable -#' y_cl <- x + 0.5*x2 + id.eff[id] + firm.eff[firm] + cl_noise(cl1) + cl_noise(cl2) -#' +#' d$y_cl <- d$x1 + 0.5 * d$x2 + id.eff[d$id] + firm.eff[d$firm] + +#' cl_noise(d$cl1) + cl_noise(d$cl2) +#' #' ## Estimate and print the model with cluster-robust SEs (default) -#' est_cl <- felm(y_cl ~ x + x2 | id + firm | 0 | cl1 + cl2) +#' est_cl <- felm(y_cl ~ x1 + x2 | id + firm | 0 | cl1 + cl2, data = d) #' summary(est_cl) -#' -#' \dontrun{ +#' #' # Print ordinary standard errors: #' summary(est_cl, robust = FALSE) #' # Match cluster-robust SEs from Stata's reghdfe package: -#' summary(felm(y_cl ~ x + x2 | id + firm | 0 | cl1 + cl2, cmethod="reghdfe"))} -#' +#' summary(felm(y_cl ~ x1 + x2 | id + firm | 0 | cl1 + cl2, +#' data = d, +#' cmethod = "reghdfe" +#' )) +#' +#' ## Restore default options #' options(oldopts) -#' +#' #' @export felm -felm <- function(formula, data, exactDOF=FALSE, subset, na.action, - contrasts=NULL,weights=NULL,...) { - - knownargs <- c('iv', 'clustervar', 'cmethod', 'keepX', 'nostats', - 'wildcard', 'kclass', 'fuller', 'keepCX', 'Nboot', - 'bootexpr', 'bootcluster','onlyse','psdef','keepModel') - cmethod <- 'cgm' +felm <- function(formula, data, exactDOF = FALSE, subset, na.action, + contrasts = NULL, weights = NULL, ...) { + knownargs <- c( + "iv", "clustervar", "cmethod", "keepX", "nostats", + "wildcard", "kclass", "fuller", "keepCX", "Nboot", + "bootexpr", "bootcluster", "onlyse", "psdef", "keepModel" + ) + cmethod <- "cgm" iv <- NULL clustervar <- NULL nostats <- FALSE - wildcard <- 'n' + wildcard <- "n" kclass <- NULL fuller <- 0 Nboot <- 0 onlyse <- FALSE bootexpr <- NULL bootcluster <- NULL - deprec <- c('iv', 'clustervar') + deprec <- c("iv", "clustervar") psdef <- TRUE keepX <- FALSE keepCX <- FALSE keepModel <- FALSE - + mf <- match.call(expand.dots = TRUE) # Currently there shouldn't be any ... arguments # check that the list is empty -# if(length(mf[['...']]) > 0) stop('unknown argument ',mf['...']) - + # if(length(mf[['...']]) > 0) stop('unknown argument ',mf['...']) + args <- list(...) - ka <- knownargs[pmatch(names(args),knownargs, duplicates.ok=FALSE)] + ka <- knownargs[pmatch(names(args), knownargs, duplicates.ok = FALSE)] names(args)[!is.na(ka)] <- ka[!is.na(ka)] dpr <- deprec[match(ka, deprec)] - if(any(!is.na(dpr))) { + if (any(!is.na(dpr))) { bad <- dpr[which(!is.na(dpr))] - .Deprecated('',msg=paste('Argument(s)',paste(bad,collapse=','), 'are deprecated and will be removed, use multipart formula instead')) -# warning('Argument(s) ',paste(bad,collapse=','), ' are deprecated and will be removed, use multipart formula instead') + .Deprecated("", msg = paste("Argument(s)", paste(bad, collapse = ","), "are deprecated and will be removed, use multipart formula instead")) + # warning('Argument(s) ',paste(bad,collapse=','), ' are deprecated and will be removed, use multipart formula instead') } env <- environment() - lapply(intersect(knownargs,ka), function(arg) assign(arg,args[[arg]], pos=env)) + lapply(intersect(knownargs, ka), function(arg) assign(arg, args[[arg]], pos = env)) - if(!(cmethod %in% c('cgm','cgm2','reghdfe','gaure'))) stop('Unknown cmethod: ',cmethod) + if (!(cmethod %in% c("cgm", "cgm2", "reghdfe", "gaure"))) stop("Unknown cmethod: ", cmethod) # also implement a check for unknown arguments unk <- setdiff(names(args), knownargs) - if(length(unk) > 0) stop('unknown arguments ',paste(unk, collapse=' ')) + if (length(unk) > 0) stop("unknown arguments ", paste(unk, collapse = " ")) # backwards compatible - Gtm <- terms(formula(as.Formula(formula), rhs=1), specials='G') - if(!is.null(attr(Gtm,'specials')$G) || !is.null(iv)) { - mf <- match.call(expand.dots=TRUE) + Gtm <- terms(formula(as.Formula(formula), rhs = 1), specials = "G") + if (!is.null(attr(Gtm, "specials")$G) || !is.null(iv)) { + mf <- match.call(expand.dots = TRUE) mf[[1L]] <- quote(..oldfelm) return(eval.parent(mf)) } - pint <- getOption('lfe.pint') + pint <- getOption("lfe.pint") start <- last <- Sys.time() - mm <- makematrix(mf, contrasts, pf=parent.frame(), clustervar, wildcard=wildcard) - if(!is.null(mm$cluster)) attr(mm$cluster,'method') <- cmethod + mm <- makematrix(mf, contrasts, pf = parent.frame(), clustervar, wildcard = wildcard) + if (!is.null(mm$cluster)) attr(mm$cluster, "method") <- cmethod now <- Sys.time() - if(now > last + pint) {last <- now; message(date(), ' finished centering model matrix')} - z <- felm.mm(mm,nostats,exactDOF,keepX,keepCX,keepModel,kclass,fuller,onlyse,psdef=psdef) + if (now > last + pint) { + last <- now + message(date(), " finished centering model matrix") + } + z <- felm.mm(mm, nostats, exactDOF, keepX, keepCX, keepModel, kclass, fuller, onlyse, psdef = psdef) z$call <- match.call() z$formula <- formula z$keepX <- keepX z$keepCX <- keepCX - if(Nboot > 0) { + if (Nboot > 0) { now <- Sys.time() - if(now > last + pint) {last <- now; message(date(), ' finished estimate, starting bootstrap')} - - mm <- makematrix(mf, contrasts, pf=parent.frame(), clustervar, wildcard=wildcard, - onlymm=TRUE) - if(is.null(bootexpr)) bootexpr <- quote(beta) - if(is.null(bootcluster)) - csample <- function() sort(sample(nrow(mm$x), replace=TRUE)) - else if(is.function(bootcluster)) - csample <- bootcluster - else if(is.factor(bootcluster)) - csample <- function() resample(bootcluster,na.action=mm$extra$naact) - else if(identical(bootcluster, 'model')) - csample <- function() resample(mm$extra$cluster) - else - stop('bootcluster must be either a factor or a function') + if (now > last + pint) { + last <- now + message(date(), " finished estimate, starting bootstrap") + } + + mm <- makematrix(mf, contrasts, + pf = parent.frame(), clustervar, wildcard = wildcard, + onlymm = TRUE + ) + if (is.null(bootexpr)) bootexpr <- quote(beta) + if (is.null(bootcluster)) { + csample <- function() sort(sample(nrow(mm$x), replace = TRUE)) + } else if (is.function(bootcluster)) { + csample <- bootcluster + } else if (is.factor(bootcluster)) { + csample <- function() resample(bootcluster, na.action = mm$extra$naact) + } else if (identical(bootcluster, "model")) { + csample <- function() resample(mm$extra$cluster) + } else { + stop("bootcluster must be either a factor or a function") + } bootstat <- nostats - if(!is.null(attr(nostats,'boot'))) bootstat <- attr(nostats,'boot') + if (!is.null(attr(nostats, "boot"))) bootstat <- attr(nostats, "boot") iii <- 0 bootfun <- function() { - now <<- Sys.time() - iii <<- iii+1 - if(now > last + pint) { - last <<- now; message(date(), ' done boot iter ',iii) + assign("now", Sys.time(), inherits = TRUE) # replaces: now <<- Sys.time() + iii_tmp <- iii + 1 # replaces: iii <<- iii+1 + assign("iii", iii_tmp, inherits = TRUE) + if (now > last + pint) { + assign("last", now, inherits = TRUE) # replaces last <<- now + message(date(), " done boot iter ", iii) } bootenv <- new.env() # we delay assign to avoid unnecessary estimating and copying - if(FALSE) {olsmms <- mms <- est <- bootx <- booty <- bootivy <- NULL} #avoid warning about no visible binding - delayedAssign('s',csample(),eval.env=bootenv,assign.env=bootenv) - delayedAssign('bootx',mm$x[s,,drop=FALSE],eval.env=bootenv,assign.env=bootenv) - delayedAssign('booty',mm$y[s,,drop=FALSE],eval.env=bootenv,assign.env=bootenv) - delayedAssign('bootivy',mm$ivy[s,,drop=FALSE],eval.env=bootenv,assign.env=bootenv) - - delayedAssign('mms', - { - mm1 <- list() - mm1$extra <- mm$extra - mm1$extra$cluster <- lapply(mm$extra$cluster,function(f) f[s]) - mm1$extra$naact <- NULL - mm1$x <- bootx - mm1$y <- booty - if(!is.null(mm$ivx)) mm1$ivx <- mm$ivx[s,,drop=FALSE] - if(!is.null(mm$ivy)) mm1$ivy <- bootivy - if(!is.null(mm$ctrl)) mm1$ctrl <- mm$ctrl[s,,drop=FALSE] - if(!is.null(mm$fl)) mm1$fl <- lapply(mm$fl,function(f) factor(f[s])) - if(!is.null(weights)) mm1$weights <- mm$weights[s] - mmdemean(mm1) - }, - eval.env=bootenv, - assign.env=bootenv) - delayedAssign('est', - felm.mm(mms,bootstat,exactDOF,keepX,keepCX,keepModel,kclass,fuller,onlyse,psdef=psdef), - eval.env=bootenv, - assign.env=bootenv) - delayedAssign('beta', coef(est), assign.env=bootenv, eval.env=bootenv) - for(n in colnames(mm$x)) { - do.call(delayedAssign,list(n,bquote(est$coefficients[.(n),]), - eval.env=bootenv, - assign.env=bootenv)) - do.call(delayedAssign,list(paste0('var.',n),bquote(bootx[,.(n)]), - eval.env=bootenv, - assign.env=bootenv)) - + if (FALSE) { + olsmms <- mms <- est <- bootx <- booty <- bootivy <- NULL + } # avoid warning about no visible binding + delayedAssign("s", csample(), eval.env = bootenv, assign.env = bootenv) + delayedAssign("bootx", mm$x[s, , drop = FALSE], eval.env = bootenv, assign.env = bootenv) + delayedAssign("booty", mm$y[s, , drop = FALSE], eval.env = bootenv, assign.env = bootenv) + delayedAssign("bootivy", mm$ivy[s, , drop = FALSE], eval.env = bootenv, assign.env = bootenv) + + delayedAssign("mms", + { + mm1 <- list() + mm1$extra <- mm$extra + mm1$extra$cluster <- lapply(mm$extra$cluster, function(f) f[s]) + mm1$extra$naact <- NULL + mm1$x <- bootx + mm1$y <- booty + if (!is.null(mm$ivx)) mm1$ivx <- mm$ivx[s, , drop = FALSE] + if (!is.null(mm$ivy)) mm1$ivy <- bootivy + if (!is.null(mm$ctrl)) mm1$ctrl <- mm$ctrl[s, , drop = FALSE] + if (!is.null(mm$fl)) mm1$fl <- lapply(mm$fl, function(f) factor(f[s])) + if (!is.null(weights)) mm1$weights <- mm$weights[s] + mmdemean(mm1) + }, + eval.env = bootenv, + assign.env = bootenv + ) + delayedAssign("est", + felm.mm(mms, bootstat, exactDOF, keepX, keepCX, keepModel, kclass, fuller, onlyse, psdef = psdef), + eval.env = bootenv, + assign.env = bootenv + ) + delayedAssign("beta", coef(est), assign.env = bootenv, eval.env = bootenv) + for (n in colnames(mm$x)) { + do.call(delayedAssign, list(n, bquote(est$coefficients[.(n), ]), + eval.env = bootenv, + assign.env = bootenv + )) + do.call(delayedAssign, list(paste0("var.", n), bquote(bootx[, .(n)]), + eval.env = bootenv, + assign.env = bootenv + )) } - for(n in colnames(mm$y)) { - do.call(delayedAssign,list(n,bquote(booty[,.(n)]), - eval.env=bootenv, - assign.env=bootenv)) + for (n in colnames(mm$y)) { + do.call(delayedAssign, list(n, bquote(booty[, .(n)]), + eval.env = bootenv, + assign.env = bootenv + )) } - if(!is.null(mm$ivy)) { + if (!is.null(mm$ivy)) { # it's an IV-estimation, make provisions for using the OLS-version - delayedAssign('olsmms', - { - if(FALSE) s <- NULL # avoid warnings about undefined s - # make the s by evaluating mms -# mms - mm1 <- list() - mm1$extra <- mm$extra - mm1$extra$ivform <- NULL - mm1$x <- cbind(mm$x[s,,drop=FALSE],mm$ivy[s,,drop=FALSE]) - mm1$y <- mm$y[s,,drop=FALSE] - if(!is.null(mm$ctrl)) mm1$ctrl <- mm$ctrl[s,,drop=FALSE] - if(!is.null(mm$fl)) mm1$fl <- lapply(mm$fl,function(f) factor(f[s])) - if(!is.null(weights)) mm1$weights <- mm$weights[s] - mmdemean(mm1) - }, - eval.env=bootenv, - assign.env=bootenv) - - delayedAssign('ols', - felm.mm(olsmms,bootstat,exactDOF,keepX,keepCX,keepModel,onlyse=onlyse,psdef=psdef), - eval.env=bootenv, - assign.env=bootenv) - - for(n in colnames(mm$ivy)) { - do.call(delayedAssign, list(n,bquote(bootivy[,.(n)]), - eval.env=bootenv, - assign.env=bootenv)) - } + delayedAssign("olsmms", + { + if (FALSE) s <- NULL # avoid warnings about undefined s + # make the s by evaluating mms + # mms + mm1 <- list() + mm1$extra <- mm$extra + mm1$extra$ivform <- NULL + mm1$x <- cbind(mm$x[s, , drop = FALSE], mm$ivy[s, , drop = FALSE]) + mm1$y <- mm$y[s, , drop = FALSE] + if (!is.null(mm$ctrl)) mm1$ctrl <- mm$ctrl[s, , drop = FALSE] + if (!is.null(mm$fl)) mm1$fl <- lapply(mm$fl, function(f) factor(f[s])) + if (!is.null(weights)) mm1$weights <- mm$weights[s] + mmdemean(mm1) + }, + eval.env = bootenv, + assign.env = bootenv + ) + delayedAssign("ols", + felm.mm(olsmms, bootstat, exactDOF, keepX, keepCX, keepModel, onlyse = onlyse, psdef = psdef), + eval.env = bootenv, + assign.env = bootenv + ) + + for (n in colnames(mm$ivy)) { + do.call(delayedAssign, list(n, bquote(bootivy[, .(n)]), + eval.env = bootenv, + assign.env = bootenv + )) + } } eval(bootexpr, bootenv) } z$boot <- replicate(Nboot, bootfun()) - } + } z } -felm.mm <- function(mm,nostats,exactDOF,keepX,keepCX,keepModel,kclass=NULL,fuller=NULL,onlyse=FALSE,psdef=FALSE) { +felm.mm <- function(mm, nostats, exactDOF, keepX, keepCX, keepModel, kclass = NULL, fuller = NULL, onlyse = FALSE, psdef = FALSE) { ivform <- mm$ivform - if(is.null(ivform)) { + if (is.null(ivform)) { # no iv, just do the thing - z <- newols(mm, nostats=nostats[1], exactDOF=exactDOF, onlyse=onlyse,psdef=psdef) - if(keepX) z$X <- if(is.null(mm$orig)) mm$x else mm$orig$x - if(keepCX) {z$cX <- mm$x; z$cY <- mm$y} - if(keepModel) z$model <- mm$model else z$model <- mm$mfcall + z <- newols(mm, nostats = nostats[1], exactDOF = exactDOF, onlyse = onlyse, psdef = psdef) + if (keepX) z$X <- if (is.null(mm$orig)) mm$x else mm$orig$x + if (keepCX) { + z$cX <- mm$x + z$cY <- mm$y + } + if (keepModel) z$model <- mm$model else z$model <- mm$mfcall z$call <- match.call() return(z) } - if(length(nostats) == 2) - nost1 <- nostats[2] - else - nost1 <- nostats[1] + if (length(nostats) == 2) { + nost1 <- nostats[2] + } else { + nost1 <- nostats[1] + } ########### Instrumental variables ############ fitnames <- makefitnames(colnames(mm$ivy)) # should we do k-class estimation? - if(is.null(kclass) || is.numeric(kclass)) { - kappa <- kclass + if (is.null(kclass) || is.numeric(kclass)) { + kappa <- kclass } else { KN <- ncol(mm$ivx) LN <- ncol(mm$x) N <- nrow(mm$x) # todo: liml - + kappa <- switch(kclass, - `2sls`=, - tsls=1.0, - nagar=1+(KN-2)/N, - b2sls=, - btsls=1/(1-(KN-2)/N), - mb2sls=, - mbtsls=(1-LN/N)/(1-KN/N-LN/N), - liml=limlk(mm), - fuller=limlk(mm)-fuller/(N-KN), - stop('Unknown k-class: ',kclass,call.=FALSE)) - if(identical(kclass,'liml') && fuller != 0) - kappa <- kappa - fuller/(N-KN) + `2sls` = , + tsls = 1.0, + nagar = 1 + (KN - 2) / N, + b2sls = , + btsls = 1 / (1 - (KN - 2) / N), + mb2sls = , + mbtsls = (1 - LN / N) / (1 - KN / N - LN / N), + liml = limlk(mm), + fuller = limlk(mm) - fuller / (N - KN), + stop("Unknown k-class: ", kclass, call. = FALSE) + ) + if (identical(kclass, "liml") && fuller != 0) { + kappa <- kappa - fuller / (N - KN) + } } # if k-class, we should add all the exogenous variables # to the lhs in the 1st stage, and obtain all the residuals # of the instruments. A fraction (1-kappa) of the residuals # are added to the fitted values when doing the 2nd stage. # nah, we should project on P_{Z,W}. Now, P_{Z,W} W = W - if(!is.null(kappa)) { - mm2 <- mm1 <- mm[names(mm) %in% c('fl','terms','cluster', 'numctrl', - 'hasicpt','na.action','contrasts', - 'weights')] + if (!is.null(kappa)) { + mm2 <- mm1 <- mm[names(mm) %in% c( + "fl", "terms", "cluster", "numctrl", + "hasicpt", "na.action", "contrasts", + "weights" + )] nmx <- colnames(mm$x) mm1$y <- cbind(mm$x, mm$ivy) mm1$x <- cbind(mm$x, mm$ivx) @@ -1408,16 +1506,19 @@ felm.mm <- function(mm,nostats,exactDOF,keepX,keepCX,keepModel,kclass=NULL,fulle mm2$x <- mm1$y mm2$orig$x <- cbind(mm$orig$x, mm$orig$ivx) mm2$orig$y <- cbind(mm$orig$y, mm$orig$ivy) -# rm(mm) - z1 <- newols(mm1, nostats=nost1, onlyse=onlyse,psdef=psdef) + # rm(mm) + z1 <- newols(mm1, nostats = nost1, onlyse = onlyse, psdef = psdef) mm2$noinst <- z1$residuals rm(mm1) -# setdimnames(mm2$x, list(NULL, c(fitnames,nmx))) - z2 <- newols(mm2, exactDOF=exactDOF, kappa=kappa, nostats=nostats[1], onlyse=onlyse, psdef=psdef) - if(keepX) z2$X <- if(is.null(mm2$orig)) mm2$x else mm2$orig$x - if(keepCX) {z2$cX <- mm2$x; z2$cY <- mm2$y} - if(keepModel) z2$model <- mm$model else z2$model <- mm$mfcall + # setdimnames(mm2$x, list(NULL, c(fitnames,nmx))) + z2 <- newols(mm2, exactDOF = exactDOF, kappa = kappa, nostats = nostats[1], onlyse = onlyse, psdef = psdef) + if (keepX) z2$X <- if (is.null(mm2$orig)) mm2$x else mm2$orig$x + if (keepCX) { + z2$cX <- mm2$x + z2$cY <- mm2$y + } + if (keepModel) z2$model <- mm$model else z2$model <- mm$mfcall z2$call <- match.call() return(z2) } @@ -1430,32 +1531,37 @@ felm.mm <- function(mm,nostats,exactDOF,keepX,keepCX,keepModel,kclass=NULL,fulle # in the first stage we should have the iv left hand side on # the lhs, the exogenous and instruments on the rhs. # mm$x is an ok rhs. The y must be replaced by the ivy - + ivars <- colnames(mm$ivx) exlist <- colnames(mm$x) - mm1 <- mm[names(mm) %in% c('fl','terms','cluster', 'numctrl', - 'hasicpt','na.action','contrasts', - 'weights')] + mm1 <- mm[names(mm) %in% c( + "fl", "terms", "cluster", "numctrl", + "hasicpt", "na.action", "contrasts", + "weights" + )] mm1$y <- mm$ivy mm1$x <- cbind(mm$x, mm$ivx) mm1$orig$y <- mm$orig$ivy mm1$orig$x <- cbind(mm$orig$x, mm$orig$ivx) - z1 <- newols(mm1, nostats=nost1, exactDOF=exactDOF, onlyse=onlyse, psdef=psdef) - if(keepX) z1$X <- if(is.null(mm1$orig)) mm1$x else mm1$orig$x - if(keepCX) {z1$cX <- mm1$x; z1$cY <- mm1$y} + z1 <- newols(mm1, nostats = nost1, exactDOF = exactDOF, onlyse = onlyse, psdef = psdef) + if (keepX) z1$X <- if (is.null(mm1$orig)) mm1$x else mm1$orig$x + if (keepCX) { + z1$cX <- mm1$x + z1$cY <- mm1$y + } rm(mm1) - if(!nost1) { - z1$iv1fstat <- lapply(z1$lhs, function(lh) waldtest(z1,ivars, lhs=lh)) + if (!nost1) { + z1$iv1fstat <- lapply(z1$lhs, function(lh) waldtest(z1, ivars, lhs = lh)) names(z1$iv1fstat) <- z1$lhs - z1$rob.iv1fstat <- lapply(z1$lhs, function(lh) waldtest(z1,ivars,type='robust', lhs=lh)) + z1$rob.iv1fstat <- lapply(z1$lhs, function(lh) waldtest(z1, ivars, type = "robust", lhs = lh)) names(z1$rob.iv1fstat) <- z1$lhs } z1$instruments <- ivars z1$centred.exo <- mm$x - z1$ivx <- mm$ivx + z1$ivx <- mm$ivx z1$ivy <- mm$ivy # then second stage. This is a bit more manipulation @@ -1463,25 +1569,57 @@ felm.mm <- function(mm,nostats,exactDOF,keepX,keepCX,keepModel,kclass=NULL,fulle # the exogenous variables plus the the predicted endogenous from z1 on the rhs # we must set the names of the exogenous variables - mm2 <- mm[names(mm) %in% c('fl','terms','cluster','numctrl','hasicpt', - 'na.action','contrasts', 'TSS','weights')] + mm2 <- mm[names(mm) %in% c( + "fl", "terms", "cluster", "numctrl", "hasicpt", + "na.action", "contrasts", "TSS", "weights" + )] mm2$x <- cbind(mm$x, z1$c.fitted.values) - setdimnames(mm2$x, list(NULL, c(exlist,fitnames))) + setdimnames(mm2$x, list(NULL, c(exlist, fitnames))) mm2$y <- mm$y - if(!is.null(mm$orig)) { - mm2$orig <- list(x=cbind(mm$orig$x, z1$fitted.values), y=mm$orig$y) - setdimnames(mm2$orig$x, list(NULL, c(exlist,fitnames))) + if (!is.null(mm$orig)) { + mm2$orig <- list(x = cbind(mm$orig$x, z1$fitted.values), y = mm$orig$y) + setdimnames(mm2$orig$x, list(NULL, c(exlist, fitnames))) } -# rm(mm) # save some memory + # rm(mm) # save some memory - z2 <- newols(mm2, stage1=z1, nostats=nostats[1], exactDOF=exactDOF, kappa=kappa, onlyse=onlyse, psdef=psdef) - if(keepX) z2$X <- if(is.null(mm2$orig)) mm2$x else mm2$orig$x - if(keepCX) {z2$cX <- mm2$x; z2$cY <- mm2$y} - if(keepModel) z2$model <- mm$model else z2$model <- mm$mfcall + z2 <- newols(mm2, stage1 = z1, nostats = nostats[1], exactDOF = exactDOF, kappa = kappa, onlyse = onlyse, psdef = psdef) + if (keepX) z2$X <- if (is.null(mm2$orig)) mm2$x else mm2$orig$x + if (keepCX) { + z2$cX <- mm2$x + z2$cY <- mm2$y + } + if (keepModel) z2$model <- mm$model else z2$model <- mm$mfcall rm(mm2) z2$call <- match.call() z2 } + +#' Check if formula contains redundant FEs +#' +#' Print warning when there is something like fe1+fe1:fe2 +#' @param formula a formula +#' @examples +#' check_redundant_fe(y ~ x) +#' check_redundant_fe(y ~ x | fe1 + fe2) +#' check_redundant_fe(y ~ x | fe1 + fe2:fe1) +#' check_redundant_fe(y ~ x | fe2 * fe1) +#' @author Grant McDermott, small adaptation by Matthieu Stigler +#' @noRd +check_redundant_fe <- function(formula) { + fml_chk <- Formula::Formula(formula) + has_FE <- length(attr(fml_chk, "rhs")) > 1 && !is.null(attr(fml_chk, "rhs")[[2]]) + if (has_FE) { + fes_chk <- attr(terms(formula(fml_chk, rhs = 2)), "term.labels") + if (any(duplicated(unlist(strsplit(fes_chk, ":"))))) { + warning(paste( + "Duplicated terms detected in the fixed effects slot.", + "If you are interacting factor variables, consider excluding", + "the parents terms, since these strictly nest the interactions", + "and are thus redundant. See ?felm 'Details'.\n" + )) + } + } +} diff --git a/R/felm.old.R b/R/felm.old.R index 94cc96f..f3bbd8b 100644 --- a/R/felm.old.R +++ b/R/felm.old.R @@ -1,130 +1,133 @@ # $Id: felm.old.R 1655 2015-03-18 18:51:06Z sgaure $ -felm.old <- function(formula,fl,data) { +felm.old <- function(formula, fl, data) { mf <- match.call(expand.dots = FALSE) - if(missing(fl)) { + if (missing(fl)) { # we should rather parse the formula tree # find the terms involving G - trm <- terms(formula,special='G') - feidx <- attr(trm,'specials')$G-1 - festr <- paste(labels(trm)[feidx],collapse='+') + trm <- terms(formula, special = "G") + feidx <- attr(trm, "specials")$G - 1 + festr <- paste(labels(trm)[feidx], collapse = "+") - if(festr == '') stop('No factors specified') + if (festr == "") stop("No factors specified") # remove the G-terms from formula - formula <- update(formula,paste('. ~ . -(',festr,')')) - mf[['formula']] <- formula + formula <- update(formula, paste(". ~ . -(", festr, ")")) + mf[["formula"]] <- formula # then make a list of them, and find their names - felist <- parse(text=paste('list(',gsub('+',',',festr,fixed=TRUE),')',sep='')) - nm <- eval(felist,list(G=function(t) as.character(substitute(t)))) + felist <- parse(text = paste("list(", gsub("+", ",", festr, fixed = TRUE), ")", sep = "")) + nm <- eval(felist, list(G = function(t) as.character(substitute(t)))) # collapse them in case there's an interaction with a funny name - nm <- lapply(nm,paste,collapse='.') - + nm <- lapply(nm, paste, collapse = ".") + # replace G with as.factor, eval with this, and the parent frame, or with data # allow interaction factors with '*' - iact <- function(a,b) interaction(a,b,drop=TRUE) - if(missing(data)) - fl <- eval(felist,list(G=as.factor,'*'=iact)) - else { + iact <- function(a, b) interaction(a, b, drop = TRUE) + if (missing(data)) { + fl <- eval(felist, list(G = as.factor, "*" = iact)) + } else { G <- as.factor - fl <- local({'*'<-iact;eval(felist,data,environment())}) + fl <- local({ + "*" <- iact + eval(felist, data, environment()) + }) } gc() names(fl) <- nm } else { -# warning('The fl-argument is obsolete') + # warning('The fl-argument is obsolete') } - if(!is.list(fl)) stop('need at least one factor') - fl <- lapply(fl,as.factor) - if(is.null(names(fl))) names(fl) <- paste('fe',1:length(fl),sep='') + if (!is.list(fl)) stop("need at least one factor") + fl <- lapply(fl, as.factor) + if (is.null(names(fl))) names(fl) <- paste("fe", 1:length(fl), sep = "") -# mf <- match.call(expand.dots = FALSE) + # mf <- match.call(expand.dots = FALSE) m <- match(c("formula", "data"), names(mf), 0L) mf <- mf[c(1L, m)] mf$drop.unused.levels <- TRUE mf[[1L]] <- as.name("model.frame") mf <- eval(mf, parent.frame()) - mt <- attr(mf,'terms') + mt <- attr(mf, "terms") - y <- model.response(mf,'numeric') + y <- model.response(mf, "numeric") -# try a sparse model matrix to save memory when removing intercept -# though, demeanlist must be full. Ah, no, not much to save because -# it won't be sparse after centering -# we should rather let demeanlist remove the intercept, this -# will save memory by not copying. But we need to remove it below in x %*% beta -# (or should we extend beta with a zero at the right place, it's only -# a vector, eh, is it, do we not allow matrix lhs? No.) + # try a sparse model matrix to save memory when removing intercept + # though, demeanlist must be full. Ah, no, not much to save because + # it won't be sparse after centering + # we should rather let demeanlist remove the intercept, this + # will save memory by not copying. But we need to remove it below in x %*% beta + # (or should we extend beta with a zero at the right place, it's only + # a vector, eh, is it, do we not allow matrix lhs? No.) - x <- model.matrix(mt,mf) + x <- model.matrix(mt, mf) rm(mf) icpt <- 0 - icpt <- which(attr(x,'assign') == 0) - if(length(icpt) == 0) icpt <- 0 + icpt <- which(attr(x, "assign") == 0) + if (length(icpt) == 0) icpt <- 0 ncov <- ncol(x) - (icpt > 0) - if(ncov == 0) { + if (ncov == 0) { # No covariates - fr <- demeanlist(y,fl) - z <- list(r.residuals=y,fe=fl,p=0,cfactor=compfactor(fl),residuals=fr,call=match.call()) - class(z) <- 'felm' + fr <- demeanlist(y, fl) + z <- list(r.residuals = y, fe = fl, p = 0, cfactor = compfactor(fl), residuals = fr, call = match.call()) + class(z) <- "felm" return(z) } # here we need to demean things - dm <- demeanlist(list(y=y,x=x),fl,icpt) + dm <- demeanlist(list(y = y, x = x), fl, icpt) yz <- dm[[1]] xz <- dm[[2]] rm(dm) gc() - badconv <- attr(xz,'badconv') + attr(yz,'badconv') - dim(xz) <- c(nrow(x),ncov) + badconv <- attr(xz, "badconv") + attr(yz, "badconv") + dim(xz) <- c(nrow(x), ncov) attributes(yz) <- attributes(y) -# here we just do an lm.fit, however lm.fit is quite slow since -# it doesn't use blas (in particular it can't use e.g. threaded blas in acml) -# so we have rolled our own. + # here we just do an lm.fit, however lm.fit is quite slow since + # it doesn't use blas (in particular it can't use e.g. threaded blas in acml) + # so we have rolled our own. -# we really don't return an 'lm' object or other similar stuff, so -# we should consider using more elementary operations which map to blas-3 -# eg. solve(crossprod(xz),t(xz) %*% yz) -# Or, even invert by solve(crossprod(xz)) since we need -# the diagonal for standard errors. We could use the cholesky inversion -# chol2inv(chol(crossprod(xz))) + # we really don't return an 'lm' object or other similar stuff, so + # we should consider using more elementary operations which map to blas-3 + # eg. solve(crossprod(xz),t(xz) %*% yz) + # Or, even invert by solve(crossprod(xz)) since we need + # the diagonal for standard errors. We could use the cholesky inversion + # chol2inv(chol(crossprod(xz))) cp <- crossprod(xz) ch <- cholx(cp) -# ch <- chol(cp) -# beta <- drop(inv %*% (t(xz) %*% yz)) + # ch <- chol(cp) + # beta <- drop(inv %*% (t(xz) %*% yz)) # remove multicollinearities - badvars <- attr(ch,'badvars') - b <- crossprod(xz,yz) + badvars <- attr(ch, "badvars") + b <- crossprod(xz, yz) - if(is.null(badvars)) { - beta <- as.vector(backsolve(ch,backsolve(ch,b,transpose=TRUE))) + if (is.null(badvars)) { + beta <- as.vector(backsolve(ch, backsolve(ch, b, transpose = TRUE))) inv <- chol2inv(ch) } else { - beta <- rep(NaN,nrow(cp)) - beta[-badvars] <- backsolve(ch,backsolve(ch,b[-badvars],transpose=TRUE)) - inv <- matrix(NaN,nrow(cp),ncol(cp)) - inv[-badvars,-badvars] <- chol2inv(ch) + beta <- rep(NaN, nrow(cp)) + beta[-badvars] <- backsolve(ch, backsolve(ch, b[-badvars], transpose = TRUE)) + inv <- matrix(NaN, nrow(cp), ncol(cp)) + inv[-badvars, -badvars] <- chol2inv(ch) } rm(b) - if(icpt > 0) names(beta) <- colnames(x)[-icpt] else names(beta) <- colnames(x) -# cat(date(),'projected system finished\n') - z <- list(coefficients=beta,badconv=badconv) + if (icpt > 0) names(beta) <- colnames(x)[-icpt] else names(beta) <- colnames(x) + # cat(date(),'projected system finished\n') + z <- list(coefficients = beta, badconv = badconv) N <- nrow(xz) p <- ncol(xz) - length(badvars) -# how well would we fit with all the dummies? -# the residuals of the centered model equals the residuals -# of the full model, thus we may compute the fitted values -# resulting from the full model. - zfit <- xz %*% ifelse(is.na(beta),0,beta) + # how well would we fit with all the dummies? + # the residuals of the centered model equals the residuals + # of the full model, thus we may compute the fitted values + # resulting from the full model. + zfit <- xz %*% ifelse(is.na(beta), 0, beta) rm(xz) zresid <- yz - zfit @@ -132,42 +135,42 @@ felm.old <- function(formula,fl,data) { z$fitted.values <- y - zresid z$residuals <- zresid # insert a zero at the intercept position - if(length(fl) > 0) { - if(icpt > 0) ibeta <- append(beta,0,after=icpt-1) else ibeta <- beta - pred <- x %*% ifelse(is.na(ibeta),0,ibeta) + if (length(fl) > 0) { + if (icpt > 0) ibeta <- append(beta, 0, after = icpt - 1) else ibeta <- beta + pred <- x %*% ifelse(is.na(ibeta), 0, ibeta) z$r.residuals <- y - pred } else { z$r.residuals <- zresid } -# z$xb <- pred + # z$xb <- pred rm(x) rm(y) gc() z$cfactor <- compfactor(fl) - numrefs <- nlevels(z$cfactor) + max(length(fl)-2,0) - numdum <- sum(unlist(lapply(fl,nlevels))) - numrefs + numrefs <- nlevels(z$cfactor) + max(length(fl) - 2, 0) + numdum <- sum(unlist(lapply(fl, nlevels))) - numrefs z$numrefs <- numrefs -# if(length(fl) <= 2) { -# numdum <- sum(unlist(lapply(fl,nlevels))) - nlevels(z$cfactor) -# } else { -# numdum <- sum(unlist(lapply(fl,nlevels))) - length(fl) + 1 -# } + # if(length(fl) <= 2) { + # numdum <- sum(unlist(lapply(fl,nlevels))) - nlevels(z$cfactor) + # } else { + # numdum <- sum(unlist(lapply(fl,nlevels))) - length(fl) + 1 + # } z$df <- N - p - numdum - vcvfactor <- sum(z$residuals**2)/z$df + vcvfactor <- sum(z$residuals**2) / z$df z$vcv <- inv * vcvfactor z$se <- sqrt(diag(z$vcv)) z$sefactor <- sqrt(vcvfactor) - z$tval <- z$coefficients/z$se - z$pval <- 2*pt(abs(z$tval),z$df,lower.tail=FALSE) + z$tval <- z$coefficients / z$se + z$pval <- 2 * pt(abs(z$tval), z$df, lower.tail = FALSE) z$terms <- mt z$fe <- fl z$N <- N z$p <- p + numdum z$xp <- p z$call <- match.call() - class(z) <- 'felm' + class(z) <- "felm" return(z) } diff --git a/R/fixedse.R b/R/fixedse.R index 9fef16e..9f2ac84 100644 --- a/R/fixedse.R +++ b/R/fixedse.R @@ -1,96 +1,101 @@ #' Compute standard errors for fixed effects #' #' fixedse computes the standard errors for the fixed effects when there is only one. -#' While \code{\link{getfe}} can provide standard errors, it does so by bootstrapping +#' While [getfe()] can provide standard errors, it does so by bootstrapping #' for general estimable functions. In the special case that there's only a single fixed #' effect, and the estimable function is just the levels, this function can be used to -#' compute the fixed effects without bootstrapping. It requires that \code{\link{felm}} +#' compute the fixed effects without bootstrapping. It requires that [felm()] #' is run with keepX=TRUE. #' @name fixedse -#' @param est 'felm' object. The result of a call to \code{\link{felm}}. +#' @param est 'felm' object. The result of a call to [felm()]. #' @param E Matrix. Estimable function. Not used at the moment. #' @param lhs character. Name of the left hand side, if more than one. #' @return numeric. Vector of standard errors. #' @examples #' x <- rnorm(1000) -#' f <- factor(sample(5,1000,replace=TRUE)) +#' f <- factor(sample(5, 1000, replace = TRUE)) #' y <- x + (1:5)[f] + rnorm(1000) -#' est <- felm(y ~ x | f, keepX=TRUE) -#' #both bootstrap and computed se: -#' cbind(getfe(est,ef=efactory(est,'ref'),se=TRUE), fse=fixedse(est)) -#' #compare with lm: -#' summary(lm(y ~x+f-1)) +#' est <- felm(y ~ x | f, keepX = TRUE) +#' # both bootstrap and computed se: +#' cbind(getfe(est, ef = efactory(est, "ref"), se = TRUE), fse = fixedse(est)) +#' # compare with lm: +#' summary(lm(y ~ x + f - 1)) #' @export #' @keywords internal #' @importFrom Matrix colSums rowSums solve -fixedse <- function(est, lhs=NULL, E) { - if(length(est$fe) == 0) return(numeric()) - if(!is.null(est$clustervar)) - stop('fixedse() does not work with clustered standard errors') - s2 <- sum(residuals(est,lhs=lhs)^2)/df.residual(est) - - if(length(est$fe) != 1) { - stop('fixedse() only works for a single fixed effect') - #Now, we must have ef an estimable function - if(missing(E) || !inherits(E,'Matrix')) stop('Must specify estimable function with more than one factor') +fixedse <- function(est, lhs = NULL, E) { + if (length(est$fe) == 0) { + return(numeric()) + } + if (!is.null(est$clustervar)) { + stop("fixedse() does not work with clustered standard errors") + } + s2 <- sum(residuals(est, lhs = lhs)^2) / df.residual(est) + + if (length(est$fe) != 1) { + stop("fixedse() only works for a single fixed effect") + # Now, we must have ef an estimable function + if (missing(E) || !inherits(E, "Matrix")) stop("Must specify estimable function with more than one factor") D <- makeDmatrix(est$fe) DE <- D %*% E dd <- crossprod(DE) dig <- diag(solve(dd)) -# dig <- sampdiag(dd,eps,ncol(E)) + # dig <- sampdiag(dd,eps,ncol(E)) DX <- crossprod(DE, est$X) - cvc <- chol(vcov(est,lhs=lhs)) - return(structure(sqrt(s2*dig + colSums(tcrossprod(cvc, solve(dd,DX))^2)), - names=colnames(E))) - + cvc <- chol(vcov(est, lhs = lhs)) + return(structure(sqrt(s2 * dig + colSums(tcrossprod(cvc, solve(dd, DX))^2)), + names = colnames(E) + )) } f <- est$fe[[1]] # Use the Woodbury identity on (D' M_X D)^{-1} = (D' (I-X(X'X)^{-1}X')) D)^{-1} # to obtain (D'D)^{-1}(D'D + D'X(X' M_D X)^{-1})X'D) (D'D)^{-1} # where (X' M_D X)^{-1} is the unscaled vcov(est) - if(is.null(est$X) || ncol(est$X) == 0) { - sqrt(s2/as.vector(table(f))) + if (is.null(est$X) || ncol(est$X) == 0) { + sqrt(s2 / as.vector(table(f))) } else { - sqrt(s2/as.vector(table(f)) + colSums(tcrossprod(chol(vcov(est,lhs=lhs)), - Crowsum(est$X,f,mean=TRUE))^2)) + sqrt(s2 / as.vector(table(f)) + colSums(tcrossprod( + chol(vcov(est, lhs = lhs)), + crowsum(est$X, f, mean = TRUE) + )^2)) } } -sampdiag <- function(A, eps=0.01, K) { +sampdiag <- function(A, eps = 0.01, K) { # Start with 10 vectors NN <- 0 N <- 100 - if(is.matrix(A) || inherits(A,'Matrix')) - K <- ncol(A) - else if(missing(K)) - stop('Must specify K=ncol(A) when A is function') + if (is.matrix(A) || inherits(A, "Matrix")) { + K <- ncol(A) + } else if (missing(K)) { + stop("Must specify K=ncol(A) when A is function") + } - meansd <- 2*eps + meansd <- 2 * eps sums <- sqsums <- numeric(K) - cgeps <- sqrt(K)*eps/10 - while(meansd > eps) { - message('meanSD = ',meansd, ' ', NN, ' new N is ',N) - N <- as.integer(max(1,min(1024*1024*getOption('lfe.bootmem')/(3*K*8),N))) -# N <- min(N,500L) - vv <- matrix(sample(c(1,-1),N*K,replace=TRUE), K) - vAv <- vv * solve(A,vv) -# vAv <- vv*cgsolve(A, vv, cgeps) + cgeps <- sqrt(K) * eps / 10 + while (meansd > eps) { + message("meanSD = ", meansd, " ", NN, " new N is ", N) + N <- as.integer(max(1, min(1024 * 1024 * getOption("lfe.bootmem") / (3 * K * 8), N))) + # N <- min(N,500L) + vv <- matrix(sample(c(1, -1), N * K, replace = TRUE), K) + vAv <- vv * solve(A, vv) + # vAv <- vv*cgsolve(A, vv, cgeps) rm(vv) sums <- sums + rowSums(vAv) sqsums <- sqsums + rowSums(vAv^2) rm(vAv) NN <- NN + N - sd <- sqrt((sqsums - sums^2/NN)/(NN-1))/sqrt(NN) - res <- sums/NN - relsd <- sd/(0+abs(res)) + sd <- sqrt((sqsums - sums^2 / NN) / (NN - 1)) / sqrt(NN) + res <- sums / NN + relsd <- sd / (0 + abs(res)) print(fivenum(relsd)) - meansd <- max(relsd) #max(relsd[relsd > eps]) -# meansd <- max(abs(sd)) - if(is.na(meansd)) break + meansd <- max(relsd) # max(relsd[relsd > eps]) + # meansd <- max(abs(sd)) + if (is.na(meansd)) break # How many Ns do we need to reach eps? - N <- NN*((meansd/eps)^2-1) + N <- NN * ((meansd / eps)^2 - 1) } - message('NN is ',NN) + message("NN is ", NN) res } - diff --git a/R/generics.R b/R/generics.R index db8866b..897f471 100644 --- a/R/generics.R +++ b/R/generics.R @@ -37,146 +37,173 @@ nobs.felm <- function(object, ...) { #' @method print felm #' @export -print.felm <- function(x,digits=max(3,getOption('digits')-3),...) { +print.felm <- function(x, digits = max(3, getOption("digits") - 3), ...) { z <- x - if(z$p == 0) { - cat('(No coefficients)\n') + if (z$p == 0) { + cat("(No coefficients)\n") return() } - print(coef(x),digits=digits,...) + print(coef(x), digits = digits, ...) } -#fixef.felm <- function(object,...) { +# fixef.felm <- function(object,...) { # fe <- getfe(object,...) # f <- fe[,'fe'] # l <- lapply(levels(f),function(n) {v <- fe[f == n,'effect']; names(v) <- as.character(fe[f==n,'idx']); v}) # names(l) <- levels(f) # l -#} +# } #' @method coef felm #' @export -coef.felm <- function(object, ..., lhs=NULL) { - if(is.null(lhs)) { - if(is.null(object$coefficients) || ncol(object$coefficients) == 1) - return({ - r <- as.vector(object$coefficients) - names(r) <- rownames(object$coefficients) - if(is.null(names(r))) names(r) <- names(object$coefficients) - r}) +coef.felm <- function(object, ..., lhs = NULL) { + if (is.null(lhs)) { + if (is.null(object$coefficients) || ncol(object$coefficients) == 1) { + return({ + r <- as.vector(object$coefficients) + names(r) <- rownames(object$coefficients) + if (is.null(names(r))) names(r) <- names(object$coefficients) + r + }) + } object$coefficients } else { -# if(anyNA(match(lhs, colnames(object$coefficients)))) - if(any(!(lhs %in% colnames(object$coefficients)))) - stop('Please specify lhs as one of ',paste(object$lhs, collapse=',')) - object$coefficients[,lhs,drop=FALSE] + # if(anyNA(match(lhs, colnames(object$coefficients)))) + if (any(!(lhs %in% colnames(object$coefficients)))) { + stop("Please specify lhs as one of ", paste(object$lhs, collapse = ",")) + } + object$coefficients[, lhs, drop = FALSE] } } #' Compute Sargan's S #' -#' @param object and object type '"felm"', the return value from \code{\link{felm}}. -#' @param lhs in case of multiple left hand sides, specify the name of the left +#' @param object and object type '"felm"', the return value from [felm()]. +#' @param lhs in case of multiple left hand sides, specify the name of the left #' hand side for which you want to compute Sargan's S. #' @param ... Not used at the moment. -#' @return \code{sargan} returns a numeric, the Sargan's S. The Basmann statistic is +#' @return `sargan` returns a numeric, the Sargan's S. The Basmann statistic is #' returned in the '"basmann"' attribute. #' @export -sargan <- function(object, ..., lhs=object$lhs[1]) { - # Sargan's S. - # Let u be the sample residuals from the 2. stage. Regress these on the instruments - # This yields a new set of residuals e. - # Sargan's S is S = N * (1-sum e^2/sum u^2) - if(any(!(lhs %in% colnames(object$coefficients)))) - stop('Please specify lhs as one of ',paste(object$lhs, collapse=',')) - resid <- object$residuals[,lhs,drop=FALSE] - mm <- list(y=resid,x=object$stage1$ivx) - ols <- newols(mm,nostats=TRUE) - if(is.null(object$weights)) w <- 1 else w <- object$weights^2 - S <- object$N * (1 - sum(w*ols$residuals^2)/sum(w*resid^2)) - return(structure(S,basmann=S*(object$N-length(ncol(mm$x)))/(object$N-S))) +sargan <- function(object, ..., lhs = object$lhs[1]) { + # Sargan's S. + # Let u be the sample residuals from the 2. stage. Regress these on the instruments + # This yields a new set of residuals e. + # Sargan's S is S = N * (1-sum e^2/sum u^2) + if (any(!(lhs %in% colnames(object$coefficients)))) { + stop("Please specify lhs as one of ", paste(object$lhs, collapse = ",")) + } + resid <- object$residuals[, lhs, drop = FALSE] + mm <- list(y = resid, x = object$stage1$ivx) + ols <- newols(mm, nostats = TRUE) + if (is.null(object$weights)) w <- 1 else w <- object$weights^2 + S <- object$N * (1 - sum(w * ols$residuals^2) / sum(w * resid^2)) + return(structure(S, basmann = S * (object$N - length(ncol(mm$x))) / (object$N - S))) } #' @method residuals felm #' @export -residuals.felm <- function(object, ..., lhs=NULL) { - if(is.null(lhs)) { +residuals.felm <- function(object, ..., lhs = NULL) { + if (is.null(lhs)) { object$residuals } else { -# if(anyNA(match(lhs, colnames(object$coefficients)))) - if(any(!(lhs %in% colnames(object$coefficients)))) - stop('Please specify lhs as one of ',paste(object$lhs, collapse=',')) - object$residuals[,lhs,drop=FALSE] + # if(anyNA(match(lhs, colnames(object$coefficients)))) + if (any(!(lhs %in% colnames(object$coefficients)))) { + stop("Please specify lhs as one of ", paste(object$lhs, collapse = ",")) + } + object$residuals[, lhs, drop = FALSE] } } #' @method vcov felm #' @export -vcov.felm <- function(object,..., type=NULL, lhs=NULL) { -# if(is.na(match(type[1], c('iid', 'robust', 'cluster')))) - if(is.null(type)) type <- if(is.null(object$clustervar)) { - if(getOption('lfe.robust')) 'robust' else 'iid' - } else 'cluster' - if(!(type[1] %in% c('iid', 'robust', 'cluster'))) - stop("specify vcov-type as 'iid', 'robust' or 'cluster'") - - if(is.null(lhs) && length(object$lhs) > 1) { - stop('Please specify which lhs to retrieve vcov for with vcov(...,lhs=[one of ', - paste(object$lhs,collapse=','),'])') - } - if(is.null(lhs) || length(object$lhs) == 1) { - if(type[1] == 'iid') return(object$vcv) - if(type[1] == 'robust') return(object$robustvcv) - if(type[1] == 'cluster') return(object$clustervcv) - } - - if(is.na(match(lhs, object$lhs))) { - stop('Please specify which lhs to retrieve vcov for with vcov(...,lhs=[one of ', - paste(object$lhs,collapse=','),'])') - } - if(type[1] == 'iid') return(object$STATS[[lhs]]$vcv) - if(type[1] == 'robust') return(object$STATS[[lhs]]$robustvcv) - if(type[1] == 'cluster') return(object$STATS[[lhs]]$clustervcv) +vcov.felm <- function(object, ..., type = NULL, lhs = NULL) { + # if(is.na(match(type[1], c('iid', 'robust', 'cluster')))) + if (is.null(type)) { + type <- if (is.null(object$clustervar)) { + if (getOption("lfe.robust")) "robust" else "iid" + } else { + "cluster" + } + } + if (!(type[1] %in% c("iid", "robust", "cluster"))) { + stop("specify vcov-type as 'iid', 'robust' or 'cluster'") + } + + if (is.null(lhs) && length(object$lhs) > 1) { + stop( + "Please specify which lhs to retrieve vcov for with vcov(...,lhs=[one of ", + paste(object$lhs, collapse = ","), "])" + ) + } + if (is.null(lhs) || length(object$lhs) == 1) { + if (type[1] == "iid") { + return(object$vcv) + } + if (type[1] == "robust") { + return(object$robustvcv) + } + if (type[1] == "cluster") { + return(object$clustervcv) + } + } + + if (is.na(match(lhs, object$lhs))) { + stop( + "Please specify which lhs to retrieve vcov for with vcov(...,lhs=[one of ", + paste(object$lhs, collapse = ","), "])" + ) + } + if (type[1] == "iid") { + return(object$STATS[[lhs]]$vcv) + } + if (type[1] == "robust") { + return(object$STATS[[lhs]]$robustvcv) + } + if (type[1] == "cluster") { + return(object$STATS[[lhs]]$clustervcv) + } } #' @method confint felm #' @export -confint.felm <- function (object, parm=NULL, level = 0.95, lhs=NULL, type=NULL, ...) { - is_multi <- length(object$lhs)>1 - - if(is_multi & is.null(lhs)) { +confint.felm <- function(object, parm = NULL, level = 0.95, lhs = NULL, type = NULL, ...) { + is_multi <- length(object$lhs) > 1 + + if (is_multi & is.null(lhs)) { lhs <- object$lhs - res_list <- lapply(object$lhs, function(x) confint_one(object, lhs=x, parm=parm, level = level, type = type, ...)) + res_list <- lapply(object$lhs, function(x) confint_one(object, lhs = x, parm = parm, level = level, type = type, ...)) res <- do.call("rbind", res_list) - rownames(res) <- paste(rep(lhs, each = object$Pp), rownames(res), sep=":") + rownames(res) <- paste(rep(lhs, each = object$Pp), rownames(res), sep = ":") } else { - res <- confint_one(object, parm=parm, level = level, lhs=lhs, type = type, ...) + res <- confint_one(object, parm = parm, level = level, lhs = lhs, type = type, ...) } res } ## usecopy/paste stats:::format.perc as would be forbidden to import unexported one -stats_format_perc <- function (probs, digits) paste(format(100 * probs, trim = TRUE, scientific = FALSE, digits = digits), "%") +stats_format_perc <- function(probs, digits) paste(format(100 * probs, trim = TRUE, scientific = FALSE, digits = digits), "%") # low level function for confint, working for only one lhs -confint_one <- function (object, parm=NULL, level = 0.95, lhs=NULL, type=NULL, ...) -{ - +confint_one <- function(object, parm = NULL, level = 0.95, lhs = NULL, type = NULL, ...) { cf <- coef(object, lhs = lhs) - if(is.matrix(cf)) { + if (is.matrix(cf)) { cf <- setNames(drop(cf), rownames(cf)) ## drop removes name if 1,1 matrix! } pnames <- names(cf) - if (is.null(parm)) + if (is.null(parm)) { parm <- pnames - else if (is.numeric(parm)) + } else if (is.numeric(parm)) { parm <- pnames[parm] - a <- (1 - level)/2 + } + a <- (1 - level) / 2 a <- c(a, 1 - a) pct <- stats_format_perc(a, 3) fac <- qt(a, object$df.residual) - ci <- array(NA, dim = c(length(parm), 2L), dimnames = list(parm, - pct)) + ci <- array(NA, dim = c(length(parm), 2L), dimnames = list( + parm, + pct + )) ses <- sqrt(diag(vcov(object, lhs = lhs, type = type)))[parm] ci[] <- cf[parm] + ses %o% fac ci @@ -184,32 +211,35 @@ confint_one <- function (object, parm=NULL, level = 0.95, lhs=NULL, type=NULL, . #' @method update felm #' @export -update.felm <- function (object, formula., ..., evaluate = TRUE) -{ - if (is.null(call <- getCall(object))) - stop("need an object with call component") - - extras <- match.call(expand.dots = FALSE)$... - if (!missing(formula.)) - call$formula <- formula(update(as.Formula(call$formula), formula.)) - if (length(extras)) { - existing <- !is.na(match(names(extras), names(call))) - for (a in names(extras)[existing]) call[[a]] <- extras[[a]] - if (any(!existing)) { - call <- c(as.list(call), extras[!existing]) - call <- as.call(call) - } +update.felm <- function(object, formula., ..., evaluate = TRUE) { + if (is.null(call <- getCall(object))) { + stop("need an object with call component") + } + + extras <- match.call(expand.dots = FALSE)$... + if (!missing(formula.)) { + call$formula <- formula(update(as.Formula(call$formula), formula.)) + } + if (length(extras)) { + existing <- !is.na(match(names(extras), names(call))) + for (a in names(extras)[existing]) call[[a]] <- extras[[a]] + if (any(!existing)) { + call <- c(as.list(call), extras[!existing]) + call <- as.call(call) } - if (evaluate) - eval(call, parent.frame()) - else call + } + if (evaluate) { + eval(call, parent.frame()) + } else { + call + } } #' @method estfun felm #' @export estfun.felm <- function(x, ...) { - cl <- match.call(expand.dots=FALSE) - do.call(utils::getS3method('estfun','lm'), as.list(cl)[-1]) + cl <- match.call(expand.dots = FALSE) + do.call(utils::getS3method("estfun", "lm"), as.list(cl)[-1]) } #' @method bread felm @@ -223,93 +253,112 @@ bread.felm <- function(x, ...) { #' @method weights felm #' @export -weights.felm <- function(object,...) if(is.null(object$weights)) NULL else object$weights^2 +weights.felm <- function(object, ...) if (is.null(object$weights)) NULL else object$weights^2 #' @method xtable summary.felm #' @export -xtable.summary.felm <- function(x, caption=NULL, label=NULL, align=NULL, digits=NULL, - display=NULL, ...) { - cl <- match.call(expand.dots=FALSE) - do.call(utils::getS3method('xtable','summary.lm'), as.list(cl)[-1]) +xtable.summary.felm <- function(x, caption = NULL, label = NULL, align = NULL, digits = NULL, + display = NULL, ...) { + cl <- match.call(expand.dots = FALSE) + do.call(utils::getS3method("xtable", "summary.lm"), as.list(cl)[-1]) } #' @method xtable felm #' @export -xtable.felm <- function(x, caption=NULL, label=NULL, align=NULL, digits=NULL, - display=NULL, ...) { - cl <- match.call(expand.dots=FALSE) - do.call(utils::getS3method('xtable','lm'), as.list(cl)[-1]) +xtable.felm <- function(x, caption = NULL, label = NULL, align = NULL, digits = NULL, + display = NULL, ...) { + cl <- match.call(expand.dots = FALSE) + do.call(utils::getS3method("xtable", "lm"), as.list(cl)[-1]) } #' @method print summary.felm #' @export -print.summary.felm <- function(x,digits=max(3L,getOption('digits')-3L),...) { - if(!is.null(x$lhs)) - cat('Summary for outcome',x$lhs,'\n') - cat('\nCall:\n ',deparse(x$call),'\n\n') +print.summary.felm <- function(x, digits = max(3L, getOption("digits") - 3L), ...) { + if (!is.null(x$lhs)) { + cat("Summary for outcome", x$lhs, "\n") + } + cat("\nCall:\n ", deparse(x$call), "\n\n") - qres <- zapsmall(quantile(x$residuals), digits+1L) - if(!is.null(x$weights)) cat('Weighted '); cat('Residuals:\n') + qres <- zapsmall(quantile(x$residuals), digits + 1L) + if (!is.null(x$weights)) cat("Weighted ") + cat("Residuals:\n") names(qres) <- c("Min", "1Q", "Median", "3Q", "Max") - print(qres,digits=digits,...) + print(qres, digits = digits, ...) - cat('\nCoefficients:\n') - if(x$Pp <= 0) + cat("\nCoefficients:\n") + if (x$Pp <= 0) { cat("(No coefficients)\n") - else { - printCoefmat(x$coefficients,digits=digits) - cat('\nResidual standard error:',format(signif(x$rse,digits)),'on',x$rdf,'degrees of freedom\n') - if (nzchar(mess <- naprint(x$na.action))) - cat(" (", mess, ")\n", sep = "") - cat('Multiple R-squared(full model):',formatC(x$r2,digits=digits),' Adjusted R-squared:', - formatC(x$r2adj,digits=digits),'\n') - if(!is.null(x$P.r.squared)) { - cat('Multiple R-squared(proj model):',formatC(x$P.r.squared,digits=digits), - ' Adjusted R-squared:', formatC(x$P.adj.r.squared,digits=digits),'\n') + } else { + printCoefmat(x$coefficients, digits = digits) + cat("\nResidual standard error:", format(signif(x$rse, digits)), "on", x$rdf, "degrees of freedom\n") + if (nzchar(mess <- naprint(x$na.action))) { + cat(" (", mess, ")\n", sep = "") + } + cat( + "Multiple R-squared(full model):", formatC(x$r2, digits = digits), " Adjusted R-squared:", + formatC(x$r2adj, digits = digits), "\n" + ) + if (!is.null(x$P.r.squared)) { + cat( + "Multiple R-squared(proj model):", formatC(x$P.r.squared, digits = digits), + " Adjusted R-squared:", formatC(x$P.adj.r.squared, digits = digits), "\n" + ) } - if(x$badF) - cat('F-statistic(full model, *iid*):') - else - cat('F-statistic(full model):') - hasicpt <- if(is.null(x$hasicpt)) 0 else 1 - - if(is.null(x$F.fstat)) { - cat(formatC(x$fstat,digits=digits),'on',x$p-hasicpt,'and',x$rdf, - 'DF, p-value:',format.pval(x$pval,digits=digits),'\n') + if (x$badF) { + cat("F-statistic(full model, *iid*):") } else { - cat(formatC(x$F.fstat['F'],digits=digits),'on',x$F.fstat['df1'], - 'and',x$F.fstat['df2'], 'DF, p-value:', - format.pval(x$F.fstat['p'],digits=digits),'\n') + cat("F-statistic(full model):") } - cat('F-statistic(proj model):',formatC(x$P.fstat[['F']],digits=digits),'on', - x$P.fstat[['df1']], 'and',x$P.fstat[['df2']], 'DF, p-value:', - format.pval(x$P.fstat[['p.F']],digits=digits),'\n') + hasicpt <- if (is.null(x$hasicpt)) 0 else 1 - if(!is.null(x$iv1fstat)) { + if (is.null(x$F.fstat)) { + cat( + formatC(x$fstat, digits = digits), "on", x$p - hasicpt, "and", x$rdf, + "DF, p-value:", format.pval(x$pval, digits = digits), "\n" + ) + } else { + cat( + formatC(x$F.fstat["F"], digits = digits), "on", x$F.fstat["df1"], + "and", x$F.fstat["df2"], "DF, p-value:", + format.pval(x$F.fstat["p"], digits = digits), "\n" + ) + } + cat( + "F-statistic(proj model):", formatC(x$P.fstat[["F"]], digits = digits), "on", + x$P.fstat[["df1"]], "and", x$P.fstat[["df2"]], "DF, p-value:", + format.pval(x$P.fstat[["p.F"]], digits = digits), "\n" + ) + + if (!is.null(x$iv1fstat)) { if1 <- x$iv1fstat - cat('F-statistic(excl instr.):') - - cat(formatC(if1[['F']],digits=digits),'on', - if1[['df1']],'and',if1[['df2']],'DF, p-value:', - format.pval(if1[['p.F']],digits=digits),'\n') + cat("F-statistic(excl instr.):") + + cat( + formatC(if1[["F"]], digits = digits), "on", + if1[["df1"]], "and", if1[["df2"]], "DF, p-value:", + format.pval(if1[["p.F"]], digits = digits), "\n" + ) } - - if(!is.null(x$E.fstat)) { - if1 <- x$E.fstat - cat('F-statistic(endog. vars):') - cat(formatC(if1[['F']],digits=digits),'on', - if1[['df1']],'and',if1[['df2']],'DF, p-value:', - format.pval(if1[['p.F']],digits=digits),'\n') + if (!is.null(x$E.fstat)) { + if1 <- x$E.fstat + cat("F-statistic(endog. vars):") + cat( + formatC(if1[["F"]], digits = digits), "on", + if1[["df1"]], "and", if1[["df2"]], "DF, p-value:", + format.pval(if1[["p.F"]], digits = digits), "\n" + ) } - if(length(x$fe) > 2 && !identical(x$exactDOF,'rM') && !x$exactDOF) - cat('*** Standard errors may be too high due to more than 2 groups and exactDOF=FALSE\n') + if (length(x$fe) > 2 && !identical(x$exactDOF, "rM") && !x$exactDOF) { + cat("*** Standard errors may be too high due to more than 2 groups and exactDOF=FALSE\n") + } } - if(!is.null(x$numctrl) && x$numctrl != 0) - cat(x$numctrl, 'variable(s) were projected out\n') - cat('\n\n') + if (!is.null(x$numctrl) && x$numctrl != 0) { + cat(x$numctrl, "variable(s) were projected out\n") + } + cat("\n\n") } @@ -318,64 +367,75 @@ print.summary.felm <- function(x,digits=max(3L,getOption('digits')-3L),...) { #' @inheritParams stats::model.frame #' @export model.frame.felm <- function(formula, ...) { - if(is.call(formula$model)) - eval(formula$model) - else - formula$model + if (is.call(formula$model)) { + eval(formula$model) + } else { + formula$model + } } #' @method model.matrix felm #' @inheritParams stats::model.matrix #' @export -model.matrix.felm <- function(object, centred=TRUE, ...) { - if(is.na(centred)) cent <- nocent <- TRUE - else if(centred) {cent <- TRUE; nocent <- FALSE} - else {cent <- FALSE; nocent <- TRUE} - - if((cent && is.null(object$cX) && is.null(object$X)) || (nocent && is.null(object$X))) { - F <- as.Formula(object$call[['formula']]) +model.matrix.felm <- function(object, centred = TRUE, ...) { + if (is.na(centred)) { + cent <- nocent <- TRUE + } else if (centred) { + cent <- TRUE + nocent <- FALSE + } else { + cent <- FALSE + nocent <- TRUE + } + + if ((cent && is.null(object$cX) && is.null(object$X)) || (nocent && is.null(object$X))) { + F <- as.Formula(object$call[["formula"]]) len <- length(F) - f1 <- formula(F,lhs=0,rhs=1) + f1 <- formula(F, lhs = 0, rhs = 1) mf <- model.frame(object) - x <- model.matrix(f1,mf) - if(!object$hasicpt) x <- delete.icpt(x) + x <- model.matrix(f1, mf) + if (!object$hasicpt) x <- delete.icpt(x) - if(len[[2]] >= 5) { - f5 <- formula(F,lhs=0,rhs=5) + if (len[[2]] >= 5) { + f5 <- formula(F, lhs = 0, rhs = 5) # project out the control variables in f5 - x <- newols(list(y=x, x=delete.icpt(model.matrix(f5,mf)), weights=object$weights), nostats=TRUE) - } - } else - x <- object$X - - if(cent) { - if(is.null(object$cX)) - cX <- demeanlist(x,object$fe,weights=object$weights) - else - cX <- object$cX + x <- newols(list(y = x, x = delete.icpt(model.matrix(f5, mf)), weights = object$weights), nostats = TRUE) } - if(nocent) return(structure(x,cX=if(cent) cX else NULL)) + } else { + x <- object$X + } + + if (cent) { + if (is.null(object$cX)) { + cX <- demeanlist(x, object$fe, weights = object$weights) + } else { + cX <- object$cX + } + } + if (nocent) { + return(structure(x, cX = if (cent) cX else NULL)) + } return(cX) } #' Summarize felm model fits -#' -#' \code{summary} method for class \code{"felm"}. -#' -#' +#' +#' `summary` method for class `"felm"`. +#' +#' #' @method summary felm -#' @param object an object of class \code{"felm"}, a result of a call to -#' \code{felm}. +#' @param object an object of class `"felm"`, a result of a call to +#' `felm`. #' @param ... further arguments passed to or from other methods. #' @param robust logical. Use robust standard errors. See notes. #' @param lhs character. If multiple left hand sides, specify the name of the #' one to obtain a summary for. -#' @return The function \code{summary.felm} returns an object of \code{class} -#' \code{"summary.felm"}. It is quite similar to en \code{"summary.lm"} +#' @return The function `summary.felm` returns an object of `class` +#' `"summary.felm"`. It is quite similar to en `"summary.lm"` #' object, but not entirely compatible. -#' -#' The \code{"summary.felm"} object is a list containing the following fields: -#' +#' +#' The `"summary.felm"` object is a list containing the following fields: +#' #' \item{residuals}{a numerical vector. The residuals of the full system, with #' dummies.} #' \item{p}{an integer. The total number of coefficients, including @@ -391,100 +451,104 @@ model.matrix.felm <- function(object, centred=TRUE, ...) { #' \item{fstat}{F-statistic.} #' \item{pval}{P-values.} #' \item{P.fstat}{Projected F-statistic. The result of a -#' call to \code{\link{waldtest}}} +#' call to [waldtest()]} #' \item{fe}{list of factors. A list of the #' terms in the second part of the model.} #' \item{lhs.}{character. If -#' \code{object} is the result of an estimation with multiple left hand sides, -#' the actual argument \code{lhs} will be copied to this field.} +#' `object` is the result of an estimation with multiple left hand sides, +#' the actual argument `lhs` will be copied to this field.} #' \item{iv1fstat}{F-statistic for excluded instruments in 1. step IV, see -#' \code{\link{felm}}.} -#' \item{iv1pval}{P-value for \code{iv1fstat}.} +#' [felm()].} +#' \item{iv1pval}{P-value for `iv1fstat`.} #' @note The standard errors are adjusted for the reduced degrees of freedom #' coming from the dummies which are implicitly present. They are also #' small-sample corrected. -#' -#' If the \code{robust} parameter is \code{FALSE}, the returned object will -#' contain ordinary standard errors. If the \code{robust} parameter is -#' \code{TRUE}, clustered standard errors are reported if a cluster was -#' specified in the call to \code{felm}; if not, heteroskedastic robust +#' +#' If the `robust` parameter is `FALSE`, the returned object will +#' contain ordinary standard errors. If the `robust` parameter is +#' `TRUE`, clustered standard errors are reported if a cluster was +#' specified in the call to `felm`; if not, heteroskedastic robust #' standard errors are reported. -#' -#' Several F-statistics reported. The \code{P.fstat} is for the projected -#' system. I.e. a joint test on whether all the \code{Pp} coefficients in -#' \code{coefficients} are zero. Then there are \code{fstat} and \code{pval} +#' +#' Several F-statistics reported. The `P.fstat` is for the projected +#' system. I.e. a joint test on whether all the `Pp` coefficients in +#' `coefficients` are zero. Then there are `fstat` and `pval` #' which is a test on all the coefficients including the ones projected out, #' except for an intercept. This statistic assumes i.i.d. errors and is not #' reliable for robust or clustered data. -#' +#' #' For a 1st stage IV-regression, an F-statistic against the model with #' excluded instruments is also computed. -#' @seealso \code{\link{waldtest}} +#' @seealso [waldtest()] #' @export -summary.felm <- function(object,...,robust=!is.null(object$clustervar)||getOption('lfe.robust'), - lhs=NULL) { +summary.felm <- function(object, ..., robust = !is.null(object$clustervar) || getOption("lfe.robust"), + lhs = NULL) { z <- object - if(z$nostats) stop('No summary for objects created with felm(nostats=TRUE)') - if(is.null(lhs)) { - if(length(z$lhs) > 1) - stop('Please specify lhs=[one of ',paste(z$lhs,collapse=','),']') + if (z$nostats) stop("No summary for objects created with felm(nostats=TRUE)") + if (is.null(lhs)) { + if (length(z$lhs) > 1) { + stop("Please specify lhs=[one of ", paste(z$lhs, collapse = ","), "]") + } STATS <- z lhs <- object$lhs - if(is.null(lhs)) lhs <- colnames(object$residuals)[1] + if (is.null(lhs)) lhs <- colnames(object$residuals)[1] } else { - if(is.na(match(lhs, z$lhs))) - stop('Please specify lhs=[one of ',paste(z$lhs,collapse=','),']') - if(length(z$lhs) >= 1) - STATS <- z$STATS[[lhs]] - else - STATS <- z + if (is.na(match(lhs, z$lhs))) { + stop("Please specify lhs=[one of ", paste(z$lhs, collapse = ","), "]") + } + if (length(z$lhs) >= 1) { + STATS <- z$STATS[[lhs]] + } else { + STATS <- z + } } res <- list() - if(is.null(z$weights)) w <- 1.0 else w <- z$weights + if (is.null(z$weights)) w <- 1.0 else w <- z$weights w2 <- w^2 res$weights <- z$weights res$p <- z$p res$Pp <- z$Pp res$numctrl <- z$numctrl res$ctrlnames <- z$ctrlnames - if(length(z$lhs) > 1) res$lhs <- lhs - if(res$Pp == 0) { - res <- list(residuals=as.vector(w*z$residuals[,lhs]),p=z$p,Pp=0,call=z$call) - class(res) <- 'summary.felm' + if (length(z$lhs) > 1) res$lhs <- lhs + if (res$Pp == 0) { + res <- list(residuals = as.vector(w * z$residuals[, lhs]), p = z$p, Pp = 0, call = z$call) + class(res) <- "summary.felm" return(res) } res$terms <- z$terms res$call <- z$call res$badF <- FALSE - if(is.logical(robust) && robust) { - if(!is.null(STATS$cse)) { - coefficients <- cbind(z$beta[,lhs],STATS$cse,STATS$ctval,STATS$cpval) - sdnam <- 'Cluster s.e.' + if (is.logical(robust) && robust) { + if (!is.null(STATS$cse)) { + coefficients <- cbind(z$beta[, lhs], STATS$cse, STATS$ctval, STATS$cpval) + sdnam <- "Cluster s.e." res$badF <- TRUE } else { - coefficients <- cbind(z$beta[,lhs],STATS$rse,STATS$rtval,STATS$rpval) - sdnam <- 'Robust s.e' + coefficients <- cbind(z$beta[, lhs], STATS$rse, STATS$rtval, STATS$rpval) + sdnam <- "Robust s.e" res$badF <- TRUE } } else { - sdnam <- 'Std. Error' - coefficients <- cbind(z$beta[,lhs],STATS$se,STATS$tval,STATS$pval) + sdnam <- "Std. Error" + coefficients <- cbind(z$beta[, lhs], STATS$se, STATS$tval, STATS$pval) } - if(!is.null(coefficients)) { - dimnames(coefficients) <- - list(rownames(z$beta), - c('Estimate',sdnam,'t value','Pr(>|t|)')) - + if (!is.null(coefficients)) { + dimnames(coefficients) <- + list( + rownames(z$beta), + c("Estimate", sdnam, "t value", "Pr(>|t|)") + ) } res$coefficients <- coefficients - res$residuals <- as.vector(w*z$residuals[,lhs]) + res$residuals <- as.vector(w * z$residuals[, lhs]) - qres <- quantile(res$residuals,na.rm=TRUE) + qres <- quantile(res$residuals, na.rm = TRUE) names(qres) <- c("Min", "1Q", "Median", "3Q", "Max") res$qres <- qres @@ -494,90 +558,93 @@ summary.felm <- function(object,...,robust=!is.null(object$clustervar)||getOptio # fstat, p-value p <- z$p - if(is.null(z$hasicpt)) hasicpt <- 0 else hasicpt <- z$hasicpt - if(length(z$fe) > 0) hasicpt <- 1 + if (is.null(z$hasicpt)) hasicpt <- 0 else hasicpt <- z$hasicpt + if (length(z$fe) > 0) hasicpt <- 1 res$hasicpt <- hasicpt rdf <- z$N - p -# rss <- sum(z$residuals[,lhs]^2) + # rss <- sum(z$residuals[,lhs]^2) rss <- sum(res$residuals^2) - if(!is.null(z$TSS)) - tss <- z$TSS[[lhs]] - else - tss <- sum( w2 * (z$response[,lhs] - mean(z$response[,lhs]))^2) + if (!is.null(z$TSS)) { + tss <- z$TSS[[lhs]] + } else { + tss <- sum(w2 * (z$response[, lhs] - mean(z$response[, lhs]))^2) + } mss <- tss - rss -# mss2 <- if(hasicpt) sum((z$fitted.values[,lhs]-mean(z$fitted.values[,lhs]))^2) else sum(z$fitted.values[,lhs]^2) -# tss <- mss + rss + # mss2 <- if(hasicpt) sum((z$fitted.values[,lhs]-mean(z$fitted.values[,lhs]))^2) else sum(z$fitted.values[,lhs]^2) + # tss <- mss + rss + + resvar <- rss / rdf + r2 <- mss / tss + r2adj <- 1 - (1 - r2) * ((z$N - hasicpt) / rdf) - resvar <- rss/rdf - r2 <- mss/tss - r2adj <- 1-(1-r2)*((z$N-hasicpt)/rdf) - -# F-tests for 2. stage iv is different - if(!is.null(z$iv.residuals)) { + # F-tests for 2. stage iv is different + if (!is.null(z$iv.residuals)) { # We have F = (tss - rss)/rss (and some df factor) # however, the numerator should be residuals w.r.t. to the # fitted variables whereas the denominator should be w.r.t. to # the original variables. (Wooldridge, p. 99) # every metric verified to match Stata ivregress with small sample adjustment Jan 12, 2015 - mss <- tss - sum(w2*z$iv.residuals[,lhs]^2) + mss <- tss - sum(w2 * z$iv.residuals[, lhs]^2) } # hmm, fstat should be computed differently when s.e. are robust or clustered. -# full model Fstat for i.i.d - F <- as.numeric((mss/(p-hasicpt))/resvar) # get rid of name - res$F.fstat <- c(F=F, - df1=p-hasicpt, - df2=rdf, - p=pf(F,p-hasicpt,rdf,lower.tail=FALSE)) + # full model Fstat for i.i.d + F <- as.numeric((mss / (p - hasicpt)) / resvar) # get rid of name + res$F.fstat <- c( + F = F, + df1 = p - hasicpt, + df2 = rdf, + p = pf(F, p - hasicpt, rdf, lower.tail = FALSE) + ) res$fstat <- F - res$pval <- res$F.fstat['p'] + res$pval <- res$F.fstat["p"] # projected R2? I.e. how much is explained by the variables that # have not been projected out? That's similar to the projected F-test. # So the tss is not the tss of the response variable, but of the # projected response (P.response doesn't exist in the old felm) - if(!is.null(z$P.TSS)) { + if (!is.null(z$P.TSS)) { tss <- z$P.TSS[lhs] mss <- tss - rss - res$P.r.squared <- mss/tss - res$P.adj.r.squared <- 1-(1-res$P.r.squared)*((z$N-hasicpt)/rdf) + res$P.r.squared <- mss / tss + res$P.adj.r.squared <- 1 - (1 - res$P.r.squared) * ((z$N - hasicpt) / rdf) } - + # use wald test if robust or clustered mcoef <- rownames(z$coefficients) - mcoef <- mcoef[!(mcoef %in% '(Intercept)')] + mcoef <- mcoef[!(mcoef %in% "(Intercept)")] - wtype <- 'iid' - if(robust) wtype <- if(is.null(z$clustervar)) 'robust' else 'cluster' - F <- try(waldtest(z,mcoef, type=wtype, lhs=lhs)) - if(inherits(F,'try-error')) { + wtype <- "iid" + if (robust) wtype <- if (is.null(z$clustervar)) "robust" else "cluster" + F <- try(waldtest(z, mcoef, type = wtype, lhs = lhs)) + if (inherits(F, "try-error")) { warning("can't compute cluster F-test") - F <- list(F=NaN,p.F=NaN) + F <- list(F = NaN, p.F = NaN) } res$P.fstat <- F # then a Wald test on the endogenous variables - if(!is.null(z$iv.residuals)) { - res$E.fstat <- waldtest(z, 'endovars', type=wtype, lhs=lhs) + if (!is.null(z$iv.residuals)) { + res$E.fstat <- waldtest(z, "endovars", type = wtype, lhs = lhs) } - sigma <- sqrt(resvar) + sigma <- sqrt(resvar) res$exactDOF <- z$exactDOF - if(is.list(z$iv1fstat)) { + if (is.list(z$iv1fstat)) { res$iv1fstat <- z$iv1fstat[[lhs]] res$rob.iv1fstat <- z$rob.iv1fstat[[lhs]] } else { res$iv1fstat <- z$iv1fstat res$rob.iv1fstat <- z$rob.iv1fstat } - res$df <- c(rdf,rdf) + res$df <- c(rdf, rdf) res$sigma <- res$rse <- sigma res$rdf <- rdf res$r.squared <- res$r2 <- r2 @@ -585,7 +652,6 @@ summary.felm <- function(object,...,robust=!is.null(object$clustervar)||getOptio res$fe <- z$fe res$N <- z$N res$na.action <- z$na.action - class(res) <- 'summary.felm' + class(res) <- "summary.felm" res } - diff --git a/R/getfe.R b/R/getfe.R index c482849..8945822 100644 --- a/R/getfe.R +++ b/R/getfe.R @@ -1,171 +1,179 @@ # return a data-frame with the group fixed effects, including zeros for references #' Retrieve the group fixed effects -#' +#' #' Compute the group fixed effects, i.e. the dummy parameters, which were swept -#' out during an estimation with \code{\link{felm}}. -#' +#' out during an estimation with [felm()]. +#' #' For the case with two factors (the terms in the second part of the formula -#' supplied to \code{\link{felm}}), one reference in each connected component +#' supplied to [felm()]), one reference in each connected component #' is adequate when interpreting the results. -#' +#' #' For three or more factors, no such easy method is known; for the -#' \code{"cholesky"} method- reference levels are found by analyzing the +#' `"cholesky"` method- reference levels are found by analyzing the #' pivoted Cholesky-decomposition of a slightly perturbed system. The -#' \code{"kaczmarz"} method provides no rank-deficiency analysis, it is assumed +#' `"kaczmarz"` method provides no rank-deficiency analysis, it is assumed #' that the factors beyond the two first contribute nothing to the #' rank-deficiency, so one reference in each is used. -#' +#' #' If there are more than two factors, only the first two will be used to #' report connected components. In this case, it is not known which graph #' theoretic concept may be used to analyze the rank-deficiency. -#' +#' #' The standard errors returned by the Kaczmarz-method are bootstrapped, -#' keeping the other coefficients (from \code{\link{felm}}) constant, i.e. they -#' are from the variance when resampling the residuals. If \code{robust=TRUE}, -#' heteroskedastic robust standard errors are estimated. If \code{robust=FALSE} -#' and \code{cluster=TRUE}, clustered standard errors with the cluster -#' specified to \code{felm()} are estimated. If \code{cluster} is a factor, it +#' keeping the other coefficients (from [felm()]) constant, i.e. they +#' are from the variance when resampling the residuals. If `robust=TRUE`, +#' heteroskedastic robust standard errors are estimated. If `robust=FALSE` +#' and `cluster=TRUE`, clustered standard errors with the cluster +#' specified to `felm()` are estimated. If `cluster` is a factor, it #' is used for the cluster definition. -#' -#' @param obj object of class \code{"felm"}, usually, a result of a call to -#' \code{\link{felm}} +#' +#' @param obj object of class `"felm"`, usually, a result of a call to +#' [felm()] #' @param references a vector of strings. If there are more than two factors #' and you have prior knowledge of what the reference levels should be like -#' \code{references='id.23'}. Not used with \code{method='kaczmarz'} +#' `references='id.23'`. Not used with `method='kaczmarz'` #' @param se logical. Set to TRUE if standard errors for the group effects are -#' wanted. This is \strong{very} time-consuming for large problems, so leave +#' wanted. This is **very** time-consuming for large problems, so leave #' it as FALSE unless absolutely needed. #' @param method character string. Either 'cholesky', 'cg', or the default #' 'kaczmarz'. The latter is often very fast and consumes little memory, it -#' requires an estimable function to be specified, see \code{\link{efactory}}. +#' requires an estimable function to be specified, see [efactory()]. #' The 'cholesky' method is no longer maintained as the author sees no use for #' it. #' @param ef function. A function of two variables, a vector of group fixed -#' effects and a logical, i.e. \code{function(v,addnames)}. This function -#' should be estimable and is used to transform the raw-coefficients \code{v} +#' effects and a logical, i.e. `function(v,addnames)`. This function +#' should be estimable and is used to transform the raw-coefficients `v` #' from the kaczmarz-method. The second variable indicates whether the #' function must return a named vector (if this is FALSE, one may skip the #' names, saving memory allocations and time). -#' -#' If a string is specified, it is fed to the \code{\link{efactory}}-function. +#' +#' If a string is specified, it is fed to the [efactory()]-function. #' The default function is one which picks one reference in each component. -#' -#' Can be set to \code{ef="ln"} to yield the minimal-norm solution from the +#' +#' Can be set to `ef="ln"` to yield the minimal-norm solution from the #' kaczmarz-method. -#' -#' It can also be set to \code{ef="zm"} to get zero means (and intercept) in +#' +#' It can also be set to `ef="zm"` to get zero means (and intercept) in #' one of the factors, and a reference in the other. #' @param bN integer. The number of bootstrap runs when standard errors are #' requested. #' @param robust logical. Should heteroskedastic standard errors be estimated? #' @param cluster logical or factor. Estimate clustered standard errors. -#' @param lhs character vector. Specify which left hand side if \code{obj} has +#' @param lhs character vector. Specify which left hand side if `obj` has #' multiple lhs. -#' @return The function \code{getfe} computes and returns a data frame +#' @return The function `getfe` computes and returns a data frame #' containing the group fixed effects. It has the columns -#' \code{c('effect','se','obs','comp','fe','idx')} -#' -#' \itemize{ \item \code{effect} is the estimated effect. \item \code{se} is -#' the standard error. \item \code{obs} is the number of observations of this -#' level. \item \code{comp} is the graph-theoretic component number, useful -#' for interpreting the effects. \item \code{fe} is the name of factor. \item -#' \code{idx} is the level of the factor. } -#' +#' `c('effect','se','obs','comp','fe','idx')` +#' +#' \itemize{ \item `effect` is the estimated effect. \item `se` is +#' the standard error. \item `obs` is the number of observations of this +#' level. \item `comp` is the graph-theoretic component number, useful +#' for interpreting the effects. \item `fe` is the name of factor. \item +#' `idx` is the level of the factor. } +#' #' With the Kaczmarz-method it's possible to specify a different estimable #' function. #' @keywords regression models #' @examples -#' -#' oldopts <- options(lfe.threads=2) +#' +#' oldopts <- options("lfe.threads") +#' options(lfe.threads = 2) #' ## create covariates #' x <- rnorm(4000) #' x2 <- rnorm(length(x)) -#' +#' #' ## create individual and firm -#' id <- factor(sample(500,length(x),replace=TRUE)) -#' firm <- factor(sample(300,length(x),replace=TRUE)) -#' +#' id <- factor(sample(500, length(x), replace = TRUE)) +#' firm <- factor(sample(300, length(x), replace = TRUE)) +#' #' ## effects #' id.eff <- rlnorm(nlevels(id)) #' firm.eff <- rexp(nlevels(firm)) -#' +#' #' ## left hand side -#' y <- x + 0.25*x2 + id.eff[id] + firm.eff[firm] + rnorm(length(x)) -#' +#' y <- x + 0.25 * x2 + id.eff[id] + firm.eff[firm] + rnorm(length(x)) +#' #' ## estimate and print result -#' est <- felm(y ~ x+x2 | id + firm) +#' est <- felm(y ~ x + x2 | id + firm) #' summary(est) #' ## extract the group effects -#' alpha <- getfe(est,se=TRUE) -#' +#' alpha <- getfe(est, se = TRUE) +#' #' ## find some estimable functions, with standard errors, we don't get #' ## names so we must precompute some numerical indices in ef -#' idx <- match(c('id.5','id.6','firm.11','firm.12'),rownames(alpha)) -#' alpha[idx,] -#' ef <- function(v,addnames) { -#' w <- c(v[idx[[2]]]-v[idx[[1]]],v[idx[[4]]]+v[idx[[1]]], -#' v[idx[[4]]]-v[idx[[3]]]) -#' if(addnames) names(w) <-c('id6-id5','f12+id5','f12-f11') +#' idx <- match(c("id.5", "id.6", "firm.11", "firm.12"), rownames(alpha)) +#' alpha[idx, ] +#' ef <- function(v, addnames) { +#' w <- c( +#' v[idx[[2]]] - v[idx[[1]]], v[idx[[4]]] + v[idx[[1]]], +#' v[idx[[4]]] - v[idx[[3]]] +#' ) +#' if (addnames) names(w) <- c("id6-id5", "f12+id5", "f12-f11") #' w #' } -#' getfe(est,ef=ef,se=TRUE) +#' getfe(est, ef = ef, se = TRUE) #' options(oldopts) #' \dontrun{ -#' summary(lm(y ~ x+x2+id+firm-1)) +#' summary(lm(y ~ x + x2 + id + firm - 1)) #' } -#' +#' #' @export getfe -getfe <- function(obj,references=NULL,se=FALSE,method='kaczmarz',ef='ref',bN=100, robust=FALSE, cluster=obj[['clustervar']], lhs=NULL) { - - if(length(obj$fe) == 0) return(NULL) - if(!is.null(obj$numctrl) && obj$numctrl > 0) - stop("Can't retrieve fixed effects when estimating with control variables") - if(method == 'kaczmarz' || method == 'cg') { - if(!is.null(references)) - warning('use estimable function (ef) instead of references in the Kaczmarz method') - if(is.null(ef)) ef <- 'ln' - if(!is.character(ef) && !is.function(ef)) - stop('ef must be a function when using the Kaczmarz method') - if(method == 'cg') { - oldopt <- options(lfe.usecg=TRUE) +getfe <- function(obj, references = NULL, se = FALSE, method = "kaczmarz", ef = "ref", bN = 100, robust = FALSE, cluster = obj[["clustervar"]], lhs = NULL) { + if (length(obj$fe) == 0) { + return(NULL) + } + if (!is.null(obj$numctrl) && obj$numctrl > 0) { + stop("Can't retrieve fixed effects when estimating with control variables") + } + if (method == "kaczmarz" || method == "cg") { + if (!is.null(references)) { + warning("use estimable function (ef) instead of references in the Kaczmarz method") + } + if (is.null(ef)) ef <- "ln" + if (!is.character(ef) && !is.function(ef)) { + stop("ef must be a function when using the Kaczmarz method") + } + if (method == "cg") { + oldopt <- options("lfe.usecg") + options(lfe.usecg = TRUE) on.exit(options(oldopt)) } - return(getfe.kaczmarz(obj,se,ef=ef,bN=bN, robust=robust, cluster=cluster, lhs=lhs)) + return(getfe.kaczmarz(obj, se, ef = ef, bN = bN, robust = robust, cluster = cluster, lhs = lhs)) } - if(method != 'cholesky') stop('method must be either kaczmarz, cg, or cholesky') + if (method != "cholesky") stop("method must be either kaczmarz, cg, or cholesky") - .Deprecated('',msg="Cholesky method is deprecated. Please consider using either 'kaczmarz' or 'cg'") - attr(se,'sefactor') <- obj$sefactor - attr(obj$fe,'references') <- references + .Deprecated("", msg = "Cholesky method is deprecated. Please consider using either 'kaczmarz' or 'cg'") + attr(se, "sefactor") <- obj$sefactor + attr(obj$fe, "references") <- references R <- obj$r.residuals # then the remaining. This is usually sufficient. # we could also partition differently, just do the 'comp' adjustment accordingly # components ddlist <- makeddlist(obj$fe) gc() - orignm <- attr(ddlist,'nm') + orignm <- attr(ddlist, "nm") comp <- 1 res <- data.frame() - for(dd in ddlist) { -# res <- foreach(dd=ddlist,.combine=rbind,.init=data.frame()) %dopar% { - dummies <- attr(dd,'dummies') - keep <- attr(dd,'keep') - comp <- attr(dd,'comp') -# cat(date(),'comp dd',comp,'size',length(keep),'\n') + for (dd in ddlist) { + # res <- foreach(dd=ddlist,.combine=rbind,.init=data.frame()) %dopar% { + dummies <- attr(dd, "dummies") + keep <- attr(dd, "keep") + comp <- attr(dd, "comp") + # cat(date(),'comp dd',comp,'size',length(keep),'\n') Rhs <- as.vector(dummies %*% R[keep]) names(Rhs) <- colnames(dd) - alpha <- findfe(dd,Rhs,se) - alpha[,'comp'] <- comp - res <- rbind(res,alpha) -# alpha + alpha <- findfe(dd, Rhs, se) + alpha[, "comp"] <- comp + res <- rbind(res, alpha) + # alpha } - res <- res[orignm,] - res[,'comp'] <- factor(res[,'comp']) -# now, add factors telling which fe-group we're in -# the rownames are of the form . - fefact <- strsplit(rownames(res),'.',fixed=TRUE) - res[,'fe'] <- factor(unlist(lapply(fefact,function(l) l[[1]]))) - res[,'idx'] <- factor(unlist(lapply(fefact,function(l) paste(l[-1],collapse='.')))) + res <- res[orignm, ] + res[, "comp"] <- factor(res[, "comp"]) + # now, add factors telling which fe-group we're in + # the rownames are of the form . + fefact <- strsplit(rownames(res), ".", fixed = TRUE) + res[, "fe"] <- factor(unlist(lapply(fefact, function(l) l[[1]]))) + res[, "idx"] <- factor(unlist(lapply(fefact, function(l) paste(l[-1], collapse = ".")))) return(res) } diff --git a/R/is.estimable.R b/R/is.estimable.R index be3de63..55df141 100644 --- a/R/is.estimable.R +++ b/R/is.estimable.R @@ -1,89 +1,95 @@ #' Verify estimability of function -#' -#' Verify that a function you have written for \code{\link{getfe}} is indeed +#' +#' Verify that a function you have written for [getfe()] is indeed #' estimable. -#' -#' When writing custom estimable functions for \code{\link{getfe}}, the -#' function \code{is.estimable} can be used to test it for estimability. -#' \code{is.estimable()} solves the sparse residual system with the Kaczmarz -#' method, using two different initial values. Then \code{ef()} is applied to -#' the two solutions. If the value of \code{ef()} differs by more than -#' \code{1e-5} in any coordinate, \code{FALSE} is returned, otherwise -#' \code{TRUE} is returned. If \code{keepdiff=TRUE}, the vector of differences -#' is attached as an attribute \code{'diff'} to the returned logical value. If +#' +#' When writing custom estimable functions for [getfe()], the +#' function `is.estimable` can be used to test it for estimability. +#' `is.estimable()` solves the sparse residual system with the Kaczmarz +#' method, using two different initial values. Then `ef()` is applied to +#' the two solutions. If the value of `ef()` differs by more than +#' `1e-5` in any coordinate, `FALSE` is returned, otherwise +#' `TRUE` is returned. If `keepdiff=TRUE`, the vector of differences +#' is attached as an attribute `'diff'` to the returned logical value. If #' you have problems with estimability, it is a fair guess that those entries -#' with a difference in absolute values smaller than, say, \code{1e-5} are +#' with a difference in absolute values smaller than, say, `1e-5` are #' estimable, whereas the others are not. -#' +#' #' @param ef function. The function to be verified. #' @param fe list of factors. -#' @param R numeric. Vector of residuals, if \code{NULL}, a random one is +#' @param R numeric. Vector of residuals, if `NULL`, a random one is #' created. -#' @param nowarn logical. Set to \code{TRUE} if \code{is.estimable} should not +#' @param nowarn logical. Set to `TRUE` if `is.estimable` should not #' throw a warning for non-estimable functions. #' @param keepdiff logical. Return differences between two different runs of #' the Kaczmarz method. #' @param threshold numeric. Threshold for determining estimability. #' @return Returns a logical. -#' @seealso \code{\link{getfe}} +#' @seealso [getfe()] #' @examples -#' -#' oldopts <- options(lfe.threads=1) +#' +#' oldopts <- options("lfe.threads") +#' options(lfe.threads = 2) #' ## create individual and firm -#' id <- factor(sample(5000,50000,replace=TRUE)) -#' firm <- factor(sample(3000,50000,replace=TRUE)) -#' +#' id <- factor(sample(5000, 50000, replace = TRUE)) +#' firm <- factor(sample(3000, 50000, replace = TRUE)) +#' #' ## create some estimable functions. It's faster to #' ## use numerical indices in ef rather than strings, and the input v #' ## to ef has no names, we have to add them when requested -#' ef <- function(v,addnames) { -#' w <- c(v[6]-v[5],v[7000]+v[5],v[7000]-v[6000]) -#' if(addnames) names(w) <-c('id6-id5','f2k+id5','f2k-f1k') +#' ef <- function(v, addnames) { +#' w <- c(v[6] - v[5], v[7000] + v[5], v[7000] - v[6000]) +#' if (addnames) names(w) <- c("id6-id5", "f2k+id5", "f2k-f1k") #' w #' } -#' is.estimable(ef,list(id=id,firm=firm)) -#' +#' is.estimable(ef, list(id = id, firm = firm)) +#' #' ## Then make an error; in the last coordinate, sum two firms -#' ef <- function(v,addnames) { -#' w <- c(v[6]-v[5],v[7000]+v[5],v[7000]+v[6000]) -#' if(addnames) names(w) <-c('id6-id5','f2k+id5','f2k-f1k') +#' ef <- function(v, addnames) { +#' w <- c(v[6] - v[5], v[7000] + v[5], v[7000] + v[6000]) +#' if (addnames) names(w) <- c("id6-id5", "f2k+id5", "f2k-f1k") #' w #' } -#' is.estimable(ef, list(id=id,firm=firm), keepdiff=TRUE) +#' is.estimable(ef, list(id = id, firm = firm), keepdiff = TRUE) #' options(oldopts) -#' +#' #' @export is.estimable -is.estimable <- function(ef,fe,R=NULL,nowarn=FALSE,keepdiff=FALSE, threshold=500*getOption('lfe.eps')) { - if(!is.function(ef)) stop('ef must be a function') - N <- sum(unlist(lapply(fe,function(f) { - x <- attr(f,'x', exact=TRUE) - if(is.matrix(x)) nlevels(f)*ncol(x) else nlevels(f) +is.estimable <- function(ef, fe, R = NULL, nowarn = FALSE, keepdiff = FALSE, threshold = 500 * getOption("lfe.eps")) { + if (!is.function(ef)) stop("ef must be a function") + N <- sum(unlist(lapply(fe, function(f) { + x <- attr(f, "x", exact = TRUE) + if (is.matrix(x)) nlevels(f) * ncol(x) else nlevels(f) }))) - if(is.null(R)) { + if (is.null(R)) { # make a suitable residual nr <- length(fe[[1]]) - vec <- unlist(lapply(fe,function(f) { - x <- attr(f,'x', exact=TRUE) - if(is.matrix(x)) return(unlist(apply(x,2,function(cl) cl*runif(nlevels(f))[f]))) + vec <- unlist(lapply(fe, function(f) { + x <- attr(f, "x", exact = TRUE) + if (is.matrix(x)) { + return(unlist(apply(x, 2, function(cl) cl * runif(nlevels(f))[f]))) + } r <- runif(nlevels(f))[f] - if(is.null(x)) r else unlist(r*x) + if (is.null(x)) r else unlist(r * x) })) - dim(vec) <- c(nr, length(vec)/nr) + dim(vec) <- c(nr, length(vec) / nr) R <- rowSums(vec) } - v1 <- ef(kaczmarz(fe,R,init=runif(N)),TRUE) - v2 <- ef(kaczmarz(fe,R,init=runif(N)),TRUE) - df <- max(abs(v1-v2)) - if(df > threshold) { - bad <- which.max(abs(v1-v2)) + v1 <- ef(kaczmarz(fe, R, init = runif(N)), TRUE) + v2 <- ef(kaczmarz(fe, R, init = runif(N)), TRUE) + df <- max(abs(v1 - v2)) + if (df > threshold) { + bad <- which.max(abs(v1 - v2)) badname <- names(bad) - if(!nowarn) - warning('non-estimable function, largest error ', - format(df,digits=1),' in coordinate ',bad, ' ("',badname,'")') - return(structure(FALSE,diff=if(!keepdiff) NULL else v1-v2)) + if (!nowarn) { + warning( + "non-estimable function, largest error ", + format(df, digits = 1), " in coordinate ", bad, ' ("', badname, '")' + ) + } + return(structure(FALSE, diff = if (!keepdiff) NULL else v1 - v2)) } - structure(TRUE,diff=if(!keepdiff) NULL else v1-v2) + structure(TRUE, diff = if (!keepdiff) NULL else v1 - v2) } diff --git a/R/kaczmarz.R b/R/kaczmarz.R index 6db332a..8e6fa55 100644 --- a/R/kaczmarz.R +++ b/R/kaczmarz.R @@ -1,129 +1,128 @@ #' Solve a linear system defined by factors -#' +#' #' Uses the Kaczmarz method to solve a system of the type Dx = R, where D is #' the matrix of dummies created from a list of factors. -#' -#' +#' +#' #' @param fl A list of arbitrary factors of the same length #' @param R numeric. A vector, matrix or list of such of the same length as #' the factors #' @param eps a tolerance for the method #' @param init numeric. A vector to use as initial value for the Kaczmarz #' iterations. The algorithm converges to the solution closest to this -#' @param threads integer. The number of threads to use when \code{R} is more +#' @param threads integer. The number of threads to use when `R` is more #' than one vector -#' @return A vector \code{x} of length equal to the sum of the number of levels -#' of the factors in \code{fl}, which solves the system \eqn{Dx=R}. If the +#' @return A vector `x` of length equal to the sum of the number of levels +#' of the factors in `fl`, which solves the system \eqn{Dx=R}. If the #' system is inconsistent, the algorithm may not converge, it will give a #' warning and return something which may or may not be close to a solution. By -#' setting \code{eps=0}, maximum accuracy (with convergence warning) will be +#' setting `eps=0`, maximum accuracy (with convergence warning) will be #' achieved. -#' @note This function is used by \code{\link{getfe}}, it's quite specialized, +#' @note This function is used by [getfe()], it's quite specialized, #' but it might be useful for other purposes too. -#' -#' In case of convergence problems, setting \code{options(lfe.usecg=TRUE)} will +#' +#' In case of convergence problems, setting `options(lfe.usecg=TRUE)` will #' cause the kaczmarz() function to dispatch to the more general conjugate -#' gradient method of \code{\link{cgsolve}}. This may or may not be faster. -#' @seealso \code{\link{cgsolve}} +#' gradient method of [cgsolve()]. This may or may not be faster. +#' @seealso [cgsolve()] #' @examples -#' +#' #' ## create factors -#' f1 <- factor(sample(24000,100000,replace=TRUE)) -#' f2 <- factor(sample(20000,length(f1),replace=TRUE)) -#' f3 <- factor(sample(10000,length(f1),replace=TRUE)) -#' f4 <- factor(sample(8000,length(f1),replace=TRUE)) +#' f1 <- factor(sample(24000, 100000, replace = TRUE)) +#' f2 <- factor(sample(20000, length(f1), replace = TRUE)) +#' f3 <- factor(sample(10000, length(f1), replace = TRUE)) +#' f4 <- factor(sample(8000, length(f1), replace = TRUE)) #' ## the matrix of dummies -#' D <- makeDmatrix(list(f1,f2,f3,f4)) -#' dim(D) +#' D <- makeDmatrix(list(f1, f2, f3, f4)) +#' dim(D) #' ## an x -#' truex <- runif(ncol(D)) +#' truex <- runif(ncol(D)) #' ## and the right hand side -#' R <- as.vector(D %*% truex) +#' R <- as.vector(D %*% truex) #' ## solve it -#' sol <- kaczmarz(list(f1,f2,f3,f4),R) +#' sol <- kaczmarz(list(f1, f2, f3, f4), R) #' ## verify that the solution solves the system Dx = R -#' sqrt(sum((D %*% sol - R)^2)) +#' sqrt(sum((D %*% sol - R)^2)) #' ## but the solution is not equal to the true x, because the system is #' ## underdetermined -#' sqrt(sum((sol - truex)^2)) +#' sqrt(sum((sol - truex)^2)) #' ## moreover, the solution from kaczmarz has smaller norm -#' sqrt(sum(sol^2)) < sqrt(sum(truex^2)) -#' +#' sqrt(sum(sol^2)) < sqrt(sum(truex^2)) +#' #' @export kaczmarz -kaczmarz <- function(fl,R,eps=getOption('lfe.eps'),init=NULL, - threads=getOption('lfe.threads')) { - - if(getOption('lfe.usecg')) { +kaczmarz <- function(fl, R, eps = getOption("lfe.eps"), init = NULL, + threads = getOption("lfe.threads")) { + if (getOption("lfe.usecg")) { mat <- makeDmatrix(fl) - if(is.list(R)) { + if (is.list(R)) { mm <- crossprod(mat) - return(lapply(R, function(ll) drop(cgsolve(mm, crossprod(mat, ll), eps=max(eps,1e-6), init=init)))) -# skel <- lapply(R,function(a) {if(is.matrix(a)) matrix(0,ncol(mat),ncol(a)) else rep(0,ncol(mat))}) -# return(utils::relist(cgsolve(crossprod(mat), crossprod(mat,Reduce(cbind,R)), eps=eps, init=init), skel)) + return(lapply(R, function(ll) drop(cgsolve(mm, crossprod(mat, ll), eps = max(eps, 1e-6), init = init)))) + # skel <- lapply(R,function(a) {if(is.matrix(a)) matrix(0,ncol(mat),ncol(a)) else rep(0,ncol(mat))}) + # return(utils::relist(cgsolve(crossprod(mat), crossprod(mat,Reduce(cbind,R)), eps=eps, init=init), skel)) } - return(drop(cgsolve(crossprod(mat), crossprod(mat,R), eps=eps, init=init))) + return(drop(cgsolve(crossprod(mat), crossprod(mat, R), eps = eps, init = init))) } - if(is.null(threads)) threads <- 1 + if (is.null(threads)) threads <- 1 islist <- is.list(R) - if(!islist) R <- list(R) - v <- .Call(C_kaczmarz,fl,R,eps,as.vector(init),as.integer(threads)) - if(!islist) { + if (!islist) R <- list(R) + v <- .Call(C_kaczmarz, fl, R, eps, as.vector(init), as.integer(threads)) + if (!islist) { v <- drop(v[[1]]) } v } -getfe.kaczmarz <- function(obj,se=FALSE,eps=getOption('lfe.eps'),ef='ref',bN=100, - robust=FALSE, cluster=NULL, lhs=NULL) { +getfe.kaczmarz <- function(obj, se = FALSE, eps = getOption("lfe.eps"), ef = "ref", bN = 100, + robust = FALSE, cluster = NULL, lhs = NULL) { inef <- ef - if(is.character(ef)) { - ef <- efactory(obj,opt=ef) + if (is.character(ef)) { + ef <- efactory(obj, opt = ef) } - if(!isTRUE(attr(ef,'verified')) && !is.estimable(ef, obj$fe)) { - warning('Supplied function seems non-estimable') + if (!isTRUE(attr(ef, "verified")) && !is.estimable(ef, obj$fe)) { + warning("Supplied function seems non-estimable") } multlhs <- length(obj$lhs) > 1 - if(is.null(lhs)) { - R <- obj$r.residuals-obj$residuals + if (is.null(lhs)) { + R <- obj$r.residuals - obj$residuals } else { - if(!all(lhs %in% obj$lhs)) stop('lhs must be subset of ', paste(obj$lhs, collapse=' ')) - R <- obj$r.residuals[,lhs, drop=FALSE] - obj$residuals[,lhs, drop=FALSE] + if (!all(lhs %in% obj$lhs)) stop("lhs must be subset of ", paste(obj$lhs, collapse = " ")) + R <- obj$r.residuals[, lhs, drop = FALSE] - obj$residuals[, lhs, drop = FALSE] } - v <- kaczmarz(obj$fe,R,eps) + v <- kaczmarz(obj$fe, R, eps) - if(is.matrix(v) && ncol(v) > 1) { - v <- apply(v,2,ef,addnames=TRUE) - vtmp <- ef(v[,1],addnames=TRUE) - extra <- attr(vtmp, 'extra') + if (is.matrix(v) && ncol(v) > 1) { + v <- apply(v, 2, ef, addnames = TRUE) + vtmp <- ef(v[, 1], addnames = TRUE) + extra <- attr(vtmp, "extra") nm <- names(vtmp) } else { - v <- ef(v,TRUE) - extra <- attr(v,'extra') + v <- ef(v, TRUE) + extra <- attr(v, "extra") nm <- names(v) } - res <- data.frame(effect=v) - if(multlhs) colnames(res) <- paste('effect',colnames(R),sep='.') - if(!is.null(extra)) res <- cbind(res,extra) + res <- data.frame(effect = v) + if (multlhs) colnames(res) <- paste("effect", colnames(R), sep = ".") + if (!is.null(extra)) res <- cbind(res, extra) rownames(res) <- nm - attr(res,'ef') <- ef + attr(res, "ef") <- ef - if(se) { - if(identical(inef, 'ref') && length(obj$fe) == 1 && robust == FALSE) { - if(multlhs) { - for(lh in colnames(R)) { - res[[paste('se',lh,sep='.')]] <- fixedse(obj,lhs=lh) + if (se) { + if (identical(inef, "ref") && length(obj$fe) == 1 && robust == FALSE) { + if (multlhs) { + for (lh in colnames(R)) { + res[[paste("se", lh, sep = ".")]] <- fixedse(obj, lhs = lh) } } else { - res[['se']] <- fixedse(obj) + res[["se"]] <- fixedse(obj) } - } else if(multlhs) { - for(lh in colnames(R)) { - res <- btrap(res,obj,bN,eps=eps, robust=robust, cluster=cluster, lhs=lh) + } else if (multlhs) { + for (lh in colnames(R)) { + res <- btrap(res, obj, bN, eps = eps, robust = robust, cluster = cluster, lhs = lh) } } else { - res <- btrap(res,obj,bN,eps=eps, robust=robust, cluster=cluster) + res <- btrap(res, obj, bN, eps = eps, robust = robust, cluster = cluster) } } res @@ -145,30 +144,32 @@ getfe.kaczmarz <- function(obj,se=FALSE,eps=getOption('lfe.eps'),ef='ref',bN=100 # return level names in appropriate order # the G(x:f) with x a matrix makes it slightly complicated -xlevels <- function(n,f,sep='.') { - x <- attr(f,'x',exact=TRUE) - plev <- paste(n,levels(f),sep=sep) - if(is.null(x) || !is.matrix(x)) return(plev) - nam <- attr(f,'xnam') - if(is.null(nam)) nam <- 'x' - if(!is.matrix(x)) return(paste(nam,plev,sep=sep)) +xlevels <- function(n, f, sep = ".") { + x <- attr(f, "x", exact = TRUE) + plev <- paste(n, levels(f), sep = sep) + if (is.null(x) || !is.matrix(x)) { + return(plev) + } + nam <- attr(f, "xnam") + if (is.null(nam)) nam <- "x" + if (!is.matrix(x)) { + return(paste(nam, plev, sep = sep)) + } matnam <- colnames(x) - if(is.null(matnam)) matnam <- paste(nam,1:ncol(x),sep='') else matnam <- paste(nam,matnam,sep='') - plev <- sub('.*:','',plev) - return(as.vector(t(outer(matnam,plev,paste,sep=':')))) + if (is.null(matnam)) matnam <- paste(nam, 1:ncol(x), sep = "") else matnam <- paste(nam, matnam, sep = "") + plev <- sub(".*:", "", plev) + return(as.vector(t(outer(matnam, plev, paste, sep = ":")))) } -nxlevels <- function(n,f) { - x <- attr(f,'x',exact=TRUE) - plev <- rep(n,nlevels(f)) - if(is.null(x) || !is.matrix(x)) return(plev) - nam <- attr(f,'xnam') +nxlevels <- function(n, f) { + x <- attr(f, "x", exact = TRUE) + plev <- rep(n, nlevels(f)) + if (is.null(x) || !is.matrix(x)) { + return(plev) + } + nam <- attr(f, "xnam") matnam <- colnames(x) - if(is.null(matnam)) matnam <- paste(nam,1:ncol(x),sep='') else matnam <- paste(nam,matnam,sep='') - plev <- sub('.*:','',plev) - return(as.vector(t(outer(matnam,plev,paste,sep=':')))) + if (is.null(matnam)) matnam <- paste(nam, 1:ncol(x), sep = "") else matnam <- paste(nam, matnam, sep = "") + plev <- sub(".*:", "", plev) + return(as.vector(t(outer(matnam, plev, paste, sep = ":")))) } - - - - diff --git a/R/lfe-package.R b/R/lfe-package.R index 1521fe5..c8b0e41 100644 --- a/R/lfe-package.R +++ b/R/lfe-package.R @@ -1,18 +1,18 @@ #' Overview. Linear Group Fixed Effects -#' +#' #' The package uses the Method of Alternating Projections to estimate linear #' models with multiple group fixed effects. A generalization of the within #' estimator. It supports IV-estimation with multiple endogenous variables via #' 2SLS, with conditional F statistics for detection of weak instruments. It is #' thread-parallelized and intended for large problems. A method for correcting #' limited mobility bias is also included. -#' -#' +#' +#' #' This package is intended for linear models with multiple group fixed #' effects, i.e. with 2 or more factors with a large number of levels. It -#' performs similar functions as \code{\link[stats]{lm}}, but it uses a special +#' performs similar functions as [stats::lm()], but it uses a special #' method for projecting out multiple group fixed effects from the normal #' equations, hence it is faster. It is a generalization of the within #' estimator. This may be required if the groups have high cardinality (many @@ -23,168 +23,169 @@ #' memory-consuming process compared to finding the point estimates. If you #' only have a single huge factor, the package \pkg{plm} is probably better #' suited. If your factors don't have thousands of levels, -#' \code{\link[stats]{lm}} or other packages are probably better suited. -#' \pkg{lfe} is designed to produce the same results as \code{\link[stats]{lm}} +#' [stats::lm()] or other packages are probably better suited. +#' \pkg{lfe} is designed to produce the same results as [stats::lm()] #' will do if run with the full set of dummies. -#' +#' #' Projecting out interactions between continuous covariates and factors is #' supported. I.e. individual slopes, not only individual intercepts. Multiple #' left hand sides are supported. #' #' The package does not support non-linear models. For GLMs with many dummies there #' is a package \pkg{alpaca} which uses similar methods to project them out. -#' +#' #' The estimation is done in two steps. First the other coefficients are -#' estimated with the function \code{\link{felm}} by centering on all the group +#' estimated with the function [felm()] by centering on all the group #' means, followed by an OLS (similar to lm). Then the group effects are -#' extracted (if needed) with the function \code{\link{getfe}}. This method is +#' extracted (if needed) with the function [getfe()]. This method is #' described by \cite{Gaure (2013)}, but also appears in \cite{Guimaraes and #' Portugal (2010)}, disguised as the Gauss-Seidel algorithm. -#' -#' There's also a function \code{\link{demeanlist}} which just does the +#' +#' There's also a function [demeanlist()] which just does the #' centering on an arbitrary matrix or data frame, and there's a function -#' \code{\link{compfactor}} which computes the connected components which are +#' [compfactor()] which computes the connected components which are #' used for interpreting the group effects when there are only two factors (see -#' the Abowd et al references), they are also returned by \code{\link{getfe}}. -#' +#' the Abowd et al references), they are also returned by [getfe()]. +#' #' For those who study the correlation between the fixed effects, like in -#' \cite{Abowd et al. (1999)}, there are functions \code{\link{bccorr}} and -#' \code{\link{fevcov}} for computing limited mobility bias corrected +#' \cite{Abowd et al. (1999)}, there are functions [bccorr()] and +#' [fevcov()] for computing limited mobility bias corrected #' correlations and variances with the method described in \cite{Gaure #' (2014b)}. -#' +#' #' Instrumental variable estimations are supported with 2SLS. Conditional F #' statistics for testing reduced rank weak instruments as in \cite{Sanderson -#' and Windmeijer (2015)} are available in \code{\link{condfstat}}. Joint -#' signficance testing of coefficients is available in \code{\link{waldtest}}. -#' +#' and Windmeijer (2015)} are available in [condfstat()]. Joint +#' significance testing of coefficients is available in [waldtest()]. +#' #' The centering on the means is done with a tolerance which is set by -#' \code{options(lfe.eps=1e-8)} (the default). This is a somewhat conservative -#' tolerance, in many cases I'd guess \code{1e-6} may be sufficient. This may +#' `options(lfe.eps=1e-8)` (the default). This is a somewhat conservative +#' tolerance, in many cases I'd guess `1e-6` may be sufficient. This may #' speed up the centering. In the other direction, setting -#' \code{options(lfe.eps=0)} will provide maximum accuracy at the cost of +#' `options(lfe.eps=0)` will provide maximum accuracy at the cost of #' computing time and warnings about convergence failure. -#' +#' #' The package is threaded, that is, it may use more than one cpu. The number #' of threads is fetched upon loading the package from the environment variable #' \env{LFE_THREADS}, \env{OMP_THREAD_LIMIT}, \env{OMP_NUM_THREADS} or #' \env{NUMBER_OF_PROCESSORS} (for Windows), and stored by -#' \code{options(lfe.threads=n)}. This option can be changed prior to calling -#' \code{\link{felm}}, if so desired. Note that, typically, \pkg{lfe} is +#' `options(lfe.threads=n)`. This option can be changed prior to calling +#' [felm()], if so desired. Note that, typically, \pkg{lfe} is #' limited by memory bandwidth, not cpu speed, thus fast memory and large cache #' is more important than clock frequency. It is therefore also not always true #' that running on all available cores is much better than running on half of #' them. -#' +#' #' Threading is only done for the centering; the extraction of the group #' effects is not threaded. The default method for extracting the group #' coefficients is the iterative Kaczmarz-method, its tolerance is also the -#' \code{lfe.eps} option. For some datasets the Kaczmarz-method is converging +#' `lfe.eps` option. For some datasets the Kaczmarz-method is converging #' very slowly, in this case it may be replaced with a conjugate gradient -#' method by setting the option \code{options(lfe.usecg=TRUE)}. Various +#' method by setting the option `options(lfe.usecg=TRUE)`. Various #' time-consuming parts of \pkg{lfe} may print progress reports, the minimum -#' interval in seconds is \code{options(lfe.pint=1800)}. -#' +#' interval in seconds is `options(lfe.pint=1800)`. +#' #' The package has been tested on datasets with approx 20,000,000 observations #' with 15 covariates and approx 2,300,000 and 270,000 group levels (the -#' \code{\link{felm}} took about 50 minutes on 8 cpus, the \code{\link{getfe}} +#' [felm()] took about 50 minutes on 8 cpus, the [getfe()] #' takes 5 minutes). Though, beware that not only the size of the dataset #' matters, but also its structure, as demonstrated by \cite{Gaure (2014a)}. -#' +#' #' The package will work with any number of grouping factors, but if more than #' two, their interpretation is in general not well understood, i.e. one should #' make sure that the group coefficients are estimable. A discussion of #' estimability, the algorithm used, and convergence rate are available in #' vignettes, as well as in the published papers in the citation list -#' (\code{citation('lfe')}). -#' -#' In the exec-directory there is a perl-script \code{lfescript} which is used +#' (`citation('lfe')`). +#' +#' In the exec-directory there is a perl-script `lfescript` which is used #' at the author's site for automated creation of R-scripts from a simple -#' specification file. The format is documented in \code{doc/lfeguide.txt}. -#' +#' specification file. The format is documented in `doc/lfeguide.txt`. +#' #' \pkg{lfe} is similar in function, though not in method, to the Stata modules -#' \code{a2reg} and \code{felsdvreg}. The method is very similar to the one in -#' the Stata module \code{reghdfe}. -#' +#' `a2reg` and `felsdvreg`. The method is very similar to the one in +#' the Stata module `reghdfe`. +#' #' @name lfe-package #' @aliases lfe-package lfe #' @docType package #' @references Abowd, J.M., F. Kramarz and D.N. Margolis (1999) \cite{High Wage #' Workers and High Wage Firms}, Econometrica 67 (1999), no. 2, 251--333. -#' \url{http://dx.doi.org/10.1111/1468-0262.00020} -#' +#' \doi{10.1111/1468-0262.00020} +#' #' Abowd, J.M., R. Creecy and F. Kramarz (2002) \cite{Computing Person and Firm #' Effects Using Linked Longitudinal Employer-Employee Data.} Technical Report #' TP-2002-06, U.S. Census Bureau. -#' \url{https://www2.census.gov/ces/tp/tp-2002-06.pdf} -#' +#' +#' #' Andrews, M., L. Gill, T. Schank and R. Upward (2008) \cite{High wage workers #' and low wage firms: negative assortative matching or limited mobility bias?} #' J.R. Stat. Soc.(A) 171(3), 673--697. -#' \url{http://dx.doi.org/10.1111/j.1467-985X.2007.00533.x} -#' +#' \doi{10.1111/j.1467-985X.2007.00533.x} +#' #' Cornelissen, T. (2008) \cite{The stata command felsdvreg to fit a linear #' model with two high-dimensional fixed effects.} Stata Journal, #' 8(2):170--189, 2008. -#' \url{http://econpapers.repec.org/RePEc:tsj:stataj:v:8:y:2008:i:2:p:170-189} -#' +#' +#' #' Correia, S. (2014) \cite{REGHDFE: Stata module to perform linear or #' instrumental-variable regression absorbing any number of high-dimensional #' fixed effects}, Statistical Software Components, Boston College Department -#' of Economics. \url{http://econpapers.repec.org/RePEc:boc:bocode:s457874} -#' +#' of Economics. +#' #' Croissant, Y. and G. Millo (2008) \cite{Panel Data Econometrics in R: The #' plm Package}, Journal of Statistical Software, 27(2). -#' \url{http://www.jstatsoft.org/v27/i02/} -#' +#' +#' #' Gaure, S. (2013) \cite{OLS with Multiple High Dimensional Category #' Variables.} Computational Statistics and Data Analysis, 66:8--18, 2013 -#' \url{http://dx.doi.org/10.1016/j.csda.2013.03.024} -#' +#' \doi{10.1016/j.csda.2013.03.024} +#' #' Gaure, S. (2014a) \cite{lfe: Linear Group Fixed Effects.} The R Journal, #' 5(2):104-117, Dec 2013. -#' \url{https://journal.r-project.org/archive/2013/RJ-2013-031/RJ-2013-031.pdf} -#' +#' +#' #' Gaure, S. (2014b), \cite{Correlation bias correction in two-way #' fixed-effects linear regression}, Stat 3(1):379-390, 2014. -#' \url{http://dx.doi.org/10.1002/sta4.68} -#' +#' \doi{10.1002/sta4.68} +#' #' Guimaraes, P. and Portugal, P. (2010) \cite{A simple feasible procedure to #' fit models with high-dimensional fixed effects.} The Stata Journal, #' 10(4):629--649, 2010. -#' \url{http://www.stata-journal.com/article.html?article=st0212} -#' +#' +#' #' Ouazad, A. (2008) \cite{A2REG: Stata module to estimate models with two #' fixed effects.} Statistical Software Components S456942, Boston College #' Department of Economics. -#' \url{http://ideas.repec.org/c/boc/bocode/s456942.html} -#' +#' +#' #' Sanderson, E. and F. Windmeijer (2014) \cite{A weak instrument F-test in #' linear IV models with multiple endogenous variables}, Journal of #' Econometrics, 2015. -#' \url{http://www.sciencedirect.com/science/article/pii/S0304407615001736} +#' #' @keywords regression models #' @examples -#' -#' oldopts <- options(lfe.threads=1) -#' x <- rnorm(1000) -#' x2 <- rnorm(length(x)) -#' id <- factor(sample(10,length(x),replace=TRUE)) -#' firm <- factor(sample(3,length(x),replace=TRUE,prob=c(2,1.5,1))) -#' year <- factor(sample(10,length(x),replace=TRUE,prob=c(2,1.5,rep(1,8)))) -#' id.eff <- rnorm(nlevels(id)) -#' firm.eff <- rnorm(nlevels(firm)) -#' year.eff <- rnorm(nlevels(year)) -#' y <- x + 0.25*x2 + id.eff[id] + firm.eff[firm] + -#' year.eff[year] + rnorm(length(x)) -#' est <- felm(y ~ x+x2 | id + firm + year) -#' summary(est) -#' -#' getfe(est,se=TRUE) +#' +#' oldopts <- options("lfe.threads") +#' options(lfe.threads = 2) +#' x <- rnorm(1000) +#' x2 <- rnorm(length(x)) +#' id <- factor(sample(10, length(x), replace = TRUE)) +#' firm <- factor(sample(3, length(x), replace = TRUE, prob = c(2, 1.5, 1))) +#' year <- factor(sample(10, length(x), replace = TRUE, prob = c(2, 1.5, rep(1, 8)))) +#' id.eff <- rnorm(nlevels(id)) +#' firm.eff <- rnorm(nlevels(firm)) +#' year.eff <- rnorm(nlevels(year)) +#' y <- x + 0.25 * x2 + id.eff[id] + firm.eff[firm] + +#' year.eff[year] + rnorm(length(x)) +#' est <- felm(y ~ x + x2 | id + firm + year) +#' summary(est) +#' +#' getfe(est, se = TRUE) #' # compare with an ordinary lm -#' summary(lm(y ~ x+x2+id+firm+year-1)) -#' options(oldopts) -#' +#' summary(lm(y ~ x + x2 + id + firm + year - 1)) +#' options(oldopts) +#' #' @useDynLib lfe, .registration=TRUE, .fixes='C_' #' @importFrom methods as #' @importFrom xtable xtable @@ -194,6 +195,3 @@ #' @importFrom Matrix t Diagonal rankMatrix Cholesky nnzero crossprod tcrossprod diag #' @importClassesFrom Matrix sparseMatrix NULL - - - diff --git a/R/nlexpect.R b/R/nlexpect.R index 35b132d..9674014 100644 --- a/R/nlexpect.R +++ b/R/nlexpect.R @@ -4,278 +4,286 @@ #' Compute expectation of a function of the coefficients. -#' +#' #' Use integration of the joint distribution of the coefficients to compute the #' expectation of some function of the coefficients. Can be used for #' non-linear inference tests. -#' -#' The function \code{nlexpect} integrates the function \code{fun(x)} over the +#' +#' The function `nlexpect` integrates the function `fun(x)` over the #' multivariate normal distribution specified by the point estimates and the -#' covariance matrix \code{vcov(est)}. This is the expectation of -#' \code{fun(beta)} if we were to bootstrap the data (e.g. by drawing the +#' covariance matrix `vcov(est)`. This is the expectation of +#' `fun(beta)` if we were to bootstrap the data (e.g. by drawing the #' residuals anew) and do repeated estimations. -#' -#' The list of coefficients used by \code{fun} must be specified in -#' \code{coefs}. -#' +#' +#' The list of coefficients used by `fun` must be specified in +#' `coefs`. +#' #' If the function is simple, it can be specified as a quoted expression like -#' \code{quote(a*b+log(abs(d)))}. In this case, if \code{coefs} is not -#' specified, it will be set to the list of all the variables occuring in the +#' `quote(a*b+log(abs(d)))`. In this case, if `coefs` is not +#' specified, it will be set to the list of all the variables occurring in the #' expression which are also names of coefficients. -#' -#' \code{fun} may return a vector of values, in which case a vector of -#' expectations is computed, like \code{quote(c(a*b, a^3-b))}. However, if the -#' expressions contain different variables, like \code{quote(c(a*b, d*e))}, a +#' +#' `fun` may return a vector of values, in which case a vector of +#' expectations is computed, like `quote(c(a*b, a^3-b))`. However, if the +#' expressions contain different variables, like `quote(c(a*b, d*e))`, a #' quite compute intensive 4-dimensional integral will be computed, compared to #' two cheap 2-dimensional integrals if you do them separately. There is nothing to gain -#' from using vector-valued functions compared to multiple calls to \code{nlexpect()}. -#' -#' You may of course also integrate inequalites like \code{quote(abs(x1-0.2) > -#' 0.2)} to simulate the probability from t-tests or Wald-tests. See the +#' from using vector-valued functions compared to multiple calls to `nlexpect()`. +#' +#' You may of course also integrate inequalities like `quote(abs(x1-0.2) > +#' 0.2)` to simulate the probability from t-tests or Wald-tests. See the #' examples. -#' -#' The function you provide will get an argument \code{...} if it does not have -#' one already. It will also be passed an argument \code{.z} which contains -#' the actual coefficients in normalized coordinates, i.e. if \code{ch} is the -#' Cholesky decomposition of the covariance matrix, and \code{pt} are the point -#' estimates, the coefficients will be \code{pt + ch \%*\% .z}. The first argument +#' +#' The function you provide will get an argument `...` if it does not have +#' one already. It will also be passed an argument `.z` which contains +#' the actual coefficients in normalized coordinates, i.e. if `ch` is the +#' Cholesky decomposition of the covariance matrix, and `pt` are the point +#' estimates, the coefficients will be `pt + ch \%*\% .z`. The first argument #' is a vector with names corresponding to the coefficients. #' -#' If you specify \code{vectorized=TRUE}, your function will be passed a list with vectors +#' If you specify `vectorized=TRUE`, your function will be passed a list with vectors #' in its first argument. The function must #' be able to handle a list, and must return a vector of the same length as the vectors -#' in the list. If you pass an expression like \code{x < y}, each variable will be a vector. +#' in the list. If you pass an expression like `x < y`, each variable will be a vector. #' If your function is vector valued, it must return a matrix where each #' column is the values. -#' -#' The \code{tol} argument specifies both the relative tolerance and the -#' absolute tolerance. If these should not be the same, specify \code{tol} as a +#' +#' The `tol` argument specifies both the relative tolerance and the +#' absolute tolerance. If these should not be the same, specify `tol` as a #' vector of length 2. The first value is the relative tolerance, the second is #' the absolute tolerance. Termination occurs when at least one of the #' tolerances is met. -#' -#' The \code{...} can be used for passing other arguments to the integration -#' routine \code{cubature::cubintegrate} and the function to be integrated. -#' -#' @param est object of class \code{"felm"} or \code{"lm"}, a result of a call to -#' \code{\link{felm}} or \code{lm}. +#' +#' The `...` can be used for passing other arguments to the integration +#' routine `cubature::cubintegrate` and the function to be integrated. +#' +#' @param est object of class `"felm"` or `"lm"`, a result of a call to +#' [felm()] or `lm`. #' @param fun function of coefficients to be integrated. Can also be a -#' \code{quote}d expression. +#' `quote`d expression. #' @param coefs character. Names of coefficients to test. Only needed if -#' \code{fun} is a function. +#' `fun` is a function. #' @param ... other arguments passed to fun or the integration routine. #' @param tol numeric. Tolerance for the computed integral. -#' @param lhs character. Name of the left hand side, if \code{est} has more +#' @param lhs character. Name of the left hand side, if `est` has more #' than one. -#' @param cv Covariance matrix to use in place of \code{vcov(est)} +#' @param cv Covariance matrix to use in place of `vcov(est)` #' @param istats logical. Should convergence information from the integration #' routine be included as attributes? #' @param flags list. Additional flags for the underlying integration routine. Not used after the #' package \pkg{R2Cuba} disappeared. #' @param max.eval integer. Maximum number of integral evaluations. -#' @param method character. A method specification usable by \code{cubature::cubintegrate}. -#' The documentation there says that \code{"pcubature"} is good for smooth integrands of low dimensions. +#' @param method character. A method specification usable by `cubature::cubintegrate`. +#' The documentation there says that `"pcubature"` is good for smooth integrands of low dimensions. #' @param vectorize logical or numeric. Use vectorized function evaluation from package #' \pkg{cubature}. This can speed up integration significantly. If method is from the Cuba library -#' (i.e. not pcubature or hcubature), \code{vectorize} should be specified as a numeric, a vectorization +#' (i.e. not pcubature or hcubature), `vectorize` should be specified as a numeric, a vectorization #' factor. The default is 128. -#' @return The function \code{nlexpect} computes and returns the expectation of -#' the function \code{fun(beta)}, with \code{beta} a vector of coefficients. -#' I.e., if the coefficients \code{beta} are bootstrapped a large number of -#' times, \code{nlexpect(est, fun)} should be equal to \code{mean(fun(beta))}. -#' @note An alternative to this method is to use the \code{bootexpr} argument -#' with \code{\link{felm}}, to do a Monte Carlo integration. -#' -#' @seealso \code{\link{waldtest}} +#' @return The function `nlexpect` computes and returns the expectation of +#' the function `fun(beta)`, with `beta` a vector of coefficients. +#' I.e., if the coefficients `beta` are bootstrapped a large number of +#' times, `nlexpect(est, fun)` should be equal to `mean(fun(beta))`. +#' @note An alternative to this method is to use the `bootexpr` argument +#' with [felm()], to do a Monte Carlo integration. +#' +#' @seealso [waldtest()] #' @examples -#' +#' #' N <- 100 #' x1 <- rnorm(N) #' # make some correlation -#' x2 <- 0.1*rnorm(N) + 0.1*x1 -#' y <- 0.1*x1 + x2 + rnorm(N) +#' x2 <- 0.1 * rnorm(N) + 0.1 * x1 +#' y <- 0.1 * x1 + x2 + rnorm(N) #' summary(est <- felm(y ~ x1 + x2)) -#' pt1 <- coef(est)['x1'] -#' pt2 <- coef(est)['x2'] +#' pt1 <- coef(est)["x1"] +#' pt2 <- coef(est)["x2"] #' # expected values of coefficients, should match the summary #' # and variance, i.e. square of standard errors in the summary -#' nlexpect(est, quote(c(x1=x1,x2=x2,var=c((x1-pt1)^2,(x2-pt2)^2)))) +#' nlexpect(est, quote(c(x1 = x1, x2 = x2, var = c((x1 - pt1)^2, (x2 - pt2)^2)))) #' \donttest{ #' # the covariance matrix: -#' nlexpect(est, tcrossprod(as.matrix(c(x1-pt1,x2-pt2)))) +#' nlexpect(est, tcrossprod(as.matrix(c(x1 - pt1, x2 - pt2)))) #' } -#' #Wald test of single variable -#' waldtest(est, ~x1)['p.F'] +#' # Wald test of single variable +#' waldtest(est, ~x1)["p.F"] #' # the same with nlexpect, i.e. probability for observing abs(x1)>abs(pt1) conditional #' # on E(x1) = 0. -#' nlexpect(est, (x1-pt1)^2 > pt1^2, tol=1e-7, vectorize=TRUE) +#' nlexpect(est, (x1 - pt1)^2 > pt1^2, tol = 1e-7, vectorize = TRUE) #' # which is the same as -#' 2*nlexpect(est, x1*sign(pt1) < 0) +#' 2 * nlexpect(est, x1 * sign(pt1) < 0) #' #' # Here's a multivalued, vectorized example -#' nlexpect(est, rbind(a=x1*x2 < pt1, b=x1*x2 > 0), vectorize=TRUE, method='divonne') +#' nlexpect(est, rbind(a = x1 * x2 < pt1, b = x1 * x2 > 0), vectorize = TRUE, method = "divonne") #' \donttest{ -#' +#' #' # Non-linear test: #' #' # A simple one, what's the probability that product x1*x2 is between 0 and |E(x1)|? -#' nlexpect(est, x1*x2 > 0 & x1*x2 < abs(pt1), vectorize=TRUE, method='divonne') +#' nlexpect(est, x1 * x2 > 0 & x1 * x2 < abs(pt1), vectorize = TRUE, method = "divonne") #' # Then a more complicated one with the expected value of a polynomal in the coefficients -#' f <- function(x) c(poly=x[['x1']]*(6*x[['x1']]-x[['x2']]^2)) +#' f <- function(x) c(poly = x[["x1"]] * (6 * x[["x1"]] - x[["x2"]]^2)) #' # This is the linearized test: -#' waldtest(est, f)['p.F'] +#' waldtest(est, f)["p.F"] #' # In general, for a function f, the non-linear Wald test is something like #' # the following: #' # expected value of function -#' Ef <- nlexpect(est, f, coefs=c('x1','x2')) +#' Ef <- nlexpect(est, f, coefs = c("x1", "x2")) #' # point value of function -#' Pf <- f(c(pt1,pt2)) +#' Pf <- f(c(pt1, pt2)) #' # similar to a Wald test, but non-linear: -#' nlexpect(est, function(x) (f(x)-Ef)^2 > Pf^2, c('x1','x2'), vectorize=TRUE) +#' nlexpect(est, function(x) (f(x) - Ef)^2 > Pf^2, c("x1", "x2"), vectorize = TRUE) #' # one-sided -#' nlexpect(est, function(x) f(x)-Ef > abs(Pf), c('x1','x2'), vectorize=TRUE) +#' nlexpect(est, function(x) f(x) - Ef > abs(Pf), c("x1", "x2"), vectorize = TRUE) #' # other sided -#' nlexpect(est, function(x) f(x)-Ef < -abs(Pf), c('x1','x2'), vectorize=TRUE) +#' nlexpect(est, function(x) f(x) - Ef < -abs(Pf), c("x1", "x2"), vectorize = TRUE) #' } -#' +#' #' @export nlexpect -nlexpect <- function(est, fun, coefs, ..., tol=getOption('lfe.etol'), lhs=NULL, - cv, istats=FALSE, flags=list(verbose=0), max.eval=200000L, - method=c('hcubature','pcubature','cuhre','suave','vegas','divonne'), - vectorize=FALSE) { - - mc <- match.call(expand.dots=FALSE) - xargs <- names(mc[['...']]) +nlexpect <- function(est, fun, coefs, ..., tol = getOption("lfe.etol"), lhs = NULL, + cv, istats = FALSE, flags = list(verbose = 0), max.eval = 200000L, + method = c("hcubature", "pcubature", "cuhre", "suave", "vegas", "divonne"), + vectorize = FALSE) { + mc <- match.call(expand.dots = FALSE) + xargs <- names(mc[["..."]]) method <- match.arg(method) - if(!requireNamespace('cubature', quietly=TRUE)) { + if (!requireNamespace("cubature", quietly = TRUE)) { warning('Package "cubature" not found.') return(NULL) } -# adapt <- TRUE -# adaptig <- switch(method,hcubature=cubature::hcubature,pcubature=cubature::pcubature, -# cuhre=cubature::cuhre,suave=cubature::suave, vegas=cubature::vegas) + # adapt <- TRUE + # adaptig <- switch(method,hcubature=cubature::hcubature,pcubature=cubature::pcubature, + # cuhre=cubature::cuhre,suave=cubature::suave, vegas=cubature::vegas) - if(isTRUE(est$nostats) && missing(cv)) - stop('This test requires that felm() is run without nostats=TRUE; or specify a cv argument') + if (isTRUE(est$nostats) && missing(cv)) { + stop("This test requires that felm() is run without nostats=TRUE; or specify a cv argument") + } # Find the covariance matrix - if(missing(cv)) cv <- vcov(est, lhs=lhs) + if (missing(cv)) cv <- vcov(est, lhs = lhs) # Some kludge to be able to use non-quoted expression for fun afun <- substitute(fun) - if(is.call(afun) || (is.name(afun) && (as.character(afun) %in% colnames(cv)))) { + if (is.call(afun) || (is.name(afun) && (as.character(afun) %in% colnames(cv)))) { lfun <- as.list(afun) - if(identical(lfun[[1]], quote(expression))) - fun <- as.call(lfun[[2]]) - else if(!identical(lfun[[1]],quote(quote)) && !identical(lfun[[1]],quote(`function`))) - fun <- afun + if (identical(lfun[[1]], quote(expression))) { + fun <- as.call(lfun[[2]]) + } else if (!identical(lfun[[1]], quote(quote)) && !identical(lfun[[1]], quote(`function`))) { + fun <- afun + } } - if(is.call(fun) || is.name(fun)) { + if (is.call(fun) || is.name(fun)) { # it's an expression. Figure out the coefficients used - if(missing(coefs)) coefs <- intersect(all.vars(fun),colnames(cv)) + if (missing(coefs)) coefs <- intersect(all.vars(fun), colnames(cv)) # make it a function - fun <- local(function(x, ...) eval(fun,c(as.list(x),list(...))), list(fun=fun)) - } else if(is.function(fun)) { + fun <- local(function(x, ...) eval(fun, c(as.list(x), list(...))), list(fun = fun)) + } else if (is.function(fun)) { fa <- formals(fun) -# nomatch <- !(xargs %in% names(fa)[-1]) -# if(any(nomatch)) -# warning('arguments ',paste(xargs[nomatch],collapse='/'), -# ' not among arguments in function to integrate') + # nomatch <- !(xargs %in% names(fa)[-1]) + # if(any(nomatch)) + # warning('arguments ',paste(xargs[nomatch],collapse='/'), + # ' not among arguments in function to integrate') - #add a ... formal if it doesn't exist - if(!('.z' %in% names(fa))) - formals(fun) <- c(fa,alist(.z=)) - if(!('...' %in% names(fa))) - formals(fun) <- c(fa,alist(...=)) + # add a ... formal if it doesn't exist + if (!(".z" %in% names(fa))) { + formals(fun) <- c(fa, alist(.z = )) + } + if (!("..." %in% names(fa))) { + formals(fun) <- c(fa, alist(... = )) + } } - if(missing(coefs) || length(coefs %in% colnames(cv)) == 0) - stop('No coefficients specified') + if (missing(coefs) || length(coefs %in% colnames(cv)) == 0) { + stop("No coefficients specified") + } # Find the coefficients - cf <- drop(coef(est, lhs=lhs))[coefs] + cf <- drop(coef(est, lhs = lhs))[coefs] # and the Cholesky - ch <- chol(cv[coefs,coefs,drop=FALSE]) + ch <- chol(cv[coefs, coefs, drop = FALSE]) tch <- t(ch) # Now, we need to integrate fun(x) > 0 over the joint distribution of the parameters # We do this as follows. We integrate over a standard hypercube (-pi/2,pi/2) x (-pi/2,pi/2) x ... # adaptIntegrate can't take infinite limits. # We first transform these to (-Inf, Inf) with # z = tan(x) - # the Jacobian determinant becomes the product of + # the Jacobian determinant becomes the product of # 1/cos(x)^2 # We transform the integration variables with the covariance matrix to feed fun(), # then integrate fun(x) > 0 with the multivariate normal distribution. # we use the package cubature for the integration. K <- length(cf) - if(is.numeric(vectorize)) nvec = if(vectorize<=1) 1 else vectorize - if(isTRUE(vectorize)) nvec = 128 else nvec = 1 - if(nvec <= 1) { + if (is.numeric(vectorize)) nvec <- if (vectorize <= 1) 1 else vectorize + if (isTRUE(vectorize)) nvec <- 128 else nvec <- 1 + if (nvec <= 1) { integrand <- function(x, ...) { - jac <- prod(1/cos(x))^2 + jac <- prod(1 / cos(x))^2 z <- tan(x) # z is the standard normal (t really) multivariate # dens <- prod(dnorm(z)) - dens <- prod(dt(z,est$df)) + dens <- prod(dt(z, est$df)) beta <- drop(cf + tch %*% z) names(beta) <- coefs - ret <- fun(beta, .z=z, ...)*jac*dens - if(anyNA(ret)) stop('Function value is NA for argument: ',sprintf('%.2e ',beta)) + ret <- fun(beta, .z = z, ...) * jac * dens + if (anyNA(ret)) stop("Function value is NA for argument: ", sprintf("%.2e ", beta)) ret } - sv <- fun(cf, .z=rep(0,K), ...) + sv <- fun(cf, .z = rep(0, K), ...) } else { integrand <- function(x, ...) { z <- tan(x) - jac <- apply(x,2,function(x) prod(1/cos(x)))^2 - dens <- apply(z,2,function(zz) prod(dt(zz,est$df))) + jac <- apply(x, 2, function(x) prod(1 / cos(x)))^2 + dens <- apply(z, 2, function(zz) prod(dt(zz, est$df))) beta <- tch %*% z + cf - lbeta <- vector('list',K) - for(i in 1:nrow(beta)) lbeta[[i]] <- beta[i,] + lbeta <- vector("list", K) + for (i in 1:nrow(beta)) lbeta[[i]] <- beta[i, ] names(lbeta) <- coefs - ret <- fun(lbeta, .z=z, ...)*jac*dens - if(anyNA(ret)) stop('Function value is NA for argument: ',sprintf('%.2e ',beta)) - if(is.matrix(ret) && ncol(ret)==ncol(x)) ret else matrix(ret,ncol=ncol(x)) + ret <- fun(lbeta, .z = z, ...) * jac * dens + if (anyNA(ret)) stop("Function value is NA for argument: ", sprintf("%.2e ", beta)) + if (is.matrix(ret) && ncol(ret) == ncol(x)) ret else matrix(ret, ncol = ncol(x)) } - sv <- fun(as.list(cf), .z=matrix(0,K), ...) + sv <- fun(as.list(cf), .z = matrix(0, K), ...) } - + fdim <- length(sv) - if(length(tol) == 2) { + if (length(tol) == 2) { reltol <- tol[1] abstol <- tol[2] } else { reltol <- abstol <- tol } - eps <- 10*.Machine$double.eps - ret <- cubature::cubintegrate(integrand,rep(-pi/2-eps,K),rep(pi/2-eps,K),fDim=fdim, - method=method,relTol=reltol, - absTol=abstol,maxEval=max.eval,nVec=nvec,...) -# ret <- adaptig(integrand,rep(-pi/2-eps,K),rep(pi/2-eps,K),...,tol=reltol, -# absError=abstol,fDim=fdim,maxEval=max.eval,vectorInterface=vectorize) + eps <- 10 * .Machine$double.eps + ret <- cubature::cubintegrate(integrand, rep(-pi / 2 - eps, K), rep(pi / 2 - eps, K), + fDim = fdim, + method = method, relTol = reltol, + absTol = abstol, maxEval = max.eval, nVec = nvec, ... + ) + # ret <- adaptig(integrand,rep(-pi/2-eps,K),rep(pi/2-eps,K),...,tol=reltol, + # absError=abstol,fDim=fdim,maxEval=max.eval,vectorInterface=vectorize) names(ret$integral) <- names(sv) - if(is.array(sv)) { + if (is.array(sv)) { dim(ret$integral) <- dim(sv) dimnames(ret$integral) <- dimnames(sv) } - if(!istats) return(ret$integral) - - names(ret)[match('integral',names(ret))] <- names(as.list(args(structure)))[1] - return(do.call(structure,ret)) + if (!istats) { + return(ret$integral) + } + + names(ret)[match("integral", names(ret))] <- names(as.list(args(structure)))[1] + return(do.call(structure, ret)) -# ret <- R2ig(K,fdim,integrand,lower=rep(-1,K),..., -# upper=rep(1,K),rel.tol=reltol,abs.tol=abstol, -# flags=flags, max.eval=max.eval) -# names(ret$value) <- names(sv) -# if(is.array(sv)) { -# dim(ret$value) <- dim(sv) -# dimnames(ret$value) <- dimnames(sv) -# } -# if(ret$ifail != 0) warning('integration failed with: "',ret$message, '", use istats=TRUE to see details') -# if(!istats) return(ret$value) -# names(ret)[match('value',names(ret))] <- names(as.list(args(structure)))[1] -# do.call(structure,ret) + # ret <- R2ig(K,fdim,integrand,lower=rep(-1,K),..., + # upper=rep(1,K),rel.tol=reltol,abs.tol=abstol, + # flags=flags, max.eval=max.eval) + # names(ret$value) <- names(sv) + # if(is.array(sv)) { + # dim(ret$value) <- dim(sv) + # dimnames(ret$value) <- dimnames(sv) + # } + # if(ret$ifail != 0) warning('integration failed with: "',ret$message, '", use istats=TRUE to see details') + # if(!istats) return(ret$value) + # names(ret)[match('value',names(ret))] <- names(as.list(args(structure)))[1] + # do.call(structure,ret) } diff --git a/R/oldfelm.R b/R/oldfelm.R index bd668bd..d516cf2 100644 --- a/R/oldfelm.R +++ b/R/oldfelm.R @@ -12,52 +12,56 @@ # when the problem is *large*. # In other cases it may simply be due to the author's unfamiliarity with how # things should be done in R - -# parse our formula -oldparseformula <- function(formula,data) { - - trm <- terms(formula,specials=c('G')) - feidx <- attr(trm,'specials')$G+1 - va <- attr(trm,'variables') - festr <- paste(sapply(feidx,function(i) deparse(va[[i]])),collapse='+') - if(festr != '') { - .Deprecated(msg="The G() syntax is deprecated, please use multipart formulas instead") +# parse our formula +oldparseformula <- function(formula, data) { + trm <- terms(formula, specials = c("G")) + feidx <- attr(trm, "specials")$G + 1 + va <- attr(trm, "variables") + festr <- paste(sapply(feidx, function(i) deparse(va[[i]])), collapse = "+") + + if (festr != "") { + .Deprecated(msg = "The G() syntax is deprecated, please use multipart formulas instead") # remove the G-terms from formula - formula <- update(formula,paste('. ~ . -(',festr,') - 1')) - + formula <- update(formula, paste(". ~ . -(", festr, ") - 1")) + # then make a list of them, and find their names - felist <- parse(text=paste('list(',gsub('+',',',festr,fixed=TRUE),')',sep='')) - nm <- eval(felist,list(G=function(arg) deparse(substitute(arg)))) - + felist <- parse(text = paste("list(", gsub("+", ",", festr, fixed = TRUE), ")", sep = "")) + nm <- eval(felist, list(G = function(arg) deparse(substitute(arg)))) + # replace G with factor, eval with this, and the parent frame, or with data # allow interaction factors with '*' (dropped, never documented, use ':') - Gfunc <- function(f) if(is.null(attr(f,'xnam'))) factor(f) else f - Ginfunc <- function(x,f) { - if(is.factor(x)) { - structure(interaction(factor(f),factor(x),drop=TRUE),xnam=deparse(substitute(x)),fnam=deparse(substitute(f))) + Gfunc <- function(f) if (is.null(attr(f, "xnam"))) factor(f) else f + Ginfunc <- function(x, f) { + if (is.factor(x)) { + structure(interaction(factor(f), factor(x), drop = TRUE), xnam = deparse(substitute(x)), fnam = deparse(substitute(f))) } else { - structure(factor(f),x=x,xnam=deparse(substitute(x)), fnam=deparse(substitute(f))) + structure(factor(f), x = x, xnam = deparse(substitute(x)), fnam = deparse(substitute(f))) } } - - if(is.environment(data)) { - fl <- eval(felist,list(G=Gfunc, ':'=Ginfunc),data) + + if (is.environment(data)) { + fl <- eval(felist, list(G = Gfunc, ":" = Ginfunc), data) } else { - fl <- local({eval(felist,data)},list(G=Gfunc, ':'=Ginfunc)) + fl <- local( + { + eval(felist, data) + }, + list(G = Gfunc, ":" = Ginfunc) + ) } names(fl) <- nm - gpart <- eval(parse(text=paste('~',paste(nm,collapse='+')))) + gpart <- eval(parse(text = paste("~", paste(nm, collapse = "+")))) - if(is.null(names(fl))) names(fl) <- paste('fe',1:length(fl),sep='') + if (is.null(names(fl))) names(fl) <- paste("fe", 1:length(fl), sep = "") } else { fl <- NULL gpart <- ~0 } - return(list(formula=formula, fl=fl, gpart=gpart,ivpart=~0,cpart=~0)) + return(list(formula = formula, fl = fl, gpart = gpart, ivpart = ~0, cpart = ~0)) } -# parse +# parse # use 2-part Formulas without G() syntax, like # y ~ x1 + x2 | f1+f2 # or 3-part or more Formulas with iv-specification like @@ -67,29 +71,33 @@ oldparseformula <- function(formula,data) { # fl = list(f1,f2) # ivpart = list(q ~x3+x4, w ~x3+x4) # cluster=list(c1,c2) -nopart <- function(x) length(all.vars(x))==0 +nopart <- function(x) length(all.vars(x)) == 0 -parseformula <- function(form, data, noexpand=FALSE) { +parseformula <- function(form, data, noexpand = FALSE) { f <- as.Formula(form) len <- length(f)[[2]] - if(len == 1) return(oldparseformula(form,data)) - opart <- formula(f,lhs=NULL,rhs=1) - if(len == 1) return(list(formula=opart,gpart=~0,ivpart=~0,cpart=~0)) + if (len == 1) { + return(oldparseformula(form, data)) + } + opart <- formula(f, lhs = NULL, rhs = 1) + if (len == 1) { + return(list(formula = opart, gpart = ~0, ivpart = ~0, cpart = ~0)) + } # the factor part - gpart <- formula(f,lhs=0, rhs=2) - if(!nopart(gpart)) { - tm <- terms(gpart, keep.order=TRUE) - ft <- attr(tm,'factors') - var <- eval(attr(tm,'variables'),data) + gpart <- formula(f, lhs = 0, rhs = 2) + if (!nopart(gpart)) { + tm <- terms(gpart, keep.order = TRUE) + ft <- attr(tm, "factors") + var <- eval(attr(tm, "variables"), data) varnames <- rownames(ft) names(var) <- varnames fl <- apply(ft, 2, function(v) { nonz <- sum(v > 0) vnam <- varnames[which(v > 0)] - if(nonz > 2) stop('Interaction only supported for two variables') - if(nonz == 1) { -# if(!is.factor(var[[vnam]])) warning('non-factor ',vnam, ' coerced to factor') + if (nonz > 2) stop("Interaction only supported for two variables") + if (nonz == 1) { + # if(!is.factor(var[[vnam]])) warning('non-factor ',vnam, ' coerced to factor') res <- list(factor(var[[vnam]])) names(res) <- vnam } else { @@ -97,10 +105,10 @@ parseformula <- function(form, data, noexpand=FALSE) { fnam <- vnam[[2]] x <- var[[xnam]] f <- var[[fnam]] - if(!is.factor(f) && !is.factor(x)) { - stop('interaction between ', xnam, ' and ', fnam, ', none of which are factors') + if (!is.factor(f) && !is.factor(x)) { + stop("interaction between ", xnam, " and ", fnam, ", none of which are factors") } - if(!is.factor(f) && is.factor(x)) { + if (!is.factor(f) && is.factor(x)) { tmp <- x x <- f f <- tmp @@ -108,152 +116,160 @@ parseformula <- function(form, data, noexpand=FALSE) { xnam <- fnam fnam <- tmp } - if(is.factor(x)) { - res <- list(structure(interaction(factor(f),factor(x),drop=TRUE),xnam=xnam,fnam=fnam)) + if (is.factor(x)) { + res <- list(structure(interaction(factor(f), factor(x), drop = TRUE), xnam = xnam, fnam = fnam)) } else { - res <- list(structure(factor(f),x=x,xnam=xnam, fnam=fnam)) + res <- list(structure(factor(f), x = x, xnam = xnam, fnam = fnam)) } - names(res) <- paste(xnam,fnam,sep=':') + names(res) <- paste(xnam, fnam, sep = ":") } res }) nm <- names(fl) - fl <- unlist(fl, recursive=FALSE) + fl <- unlist(fl, recursive = FALSE) names(fl) <- nm } else { fl <- NULL } - - if(len == 2) return(list(formula=opart,fl=fl,gpart=gpart,ivpart=~0,cpart=~0)) + + if (len == 2) { + return(list(formula = opart, fl = fl, gpart = gpart, ivpart = ~0, cpart = ~0)) + } # Then the iv-part - ivparts <- formula(f,lhs=0,rhs=3, drop=TRUE) - if(!nopart(ivparts) && length(ivparts[[2]])>1 && ivparts[[2]][[1]]=='(') { - # Now, make a list of the iv-formulas where we split the lhs in each - # to obtain q ~ x3+x4, w ~x3+x4 - ivspec <- as.Formula(ivparts[[2]][[2]]) # it's now q|w ~ x3+x4 - lhs <- formula(ivspec,rhs=0) - ivpart <- lapply(seq_along(all.vars(lhs)),function(i) formula(ivspec,lhs=i)) + ivparts <- formula(f, lhs = 0, rhs = 3, drop = TRUE) + if (!nopart(ivparts) && length(ivparts[[2]]) > 1 && ivparts[[2]][[1]] == "(") { + # Now, make a list of the iv-formulas where we split the lhs in each + # to obtain q ~ x3+x4, w ~x3+x4 + ivspec <- as.Formula(ivparts[[2]][[2]]) # it's now q|w ~ x3+x4 + lhs <- formula(ivspec, rhs = 0) + ivpart <- lapply(seq_along(all.vars(lhs)), function(i) formula(ivspec, lhs = i)) } else { - ivpart <- NULL + ivpart <- NULL + } + + if (len == 3 && !is.null(ivpart)) { + return(list(formula = opart, fl = fl, iv = ivpart, gpart = gpart, ivpart = ivparts, cpart = ~0)) } - if(len == 3 && !is.null(ivpart)) return(list(formula=opart,fl=fl,iv=ivpart,gpart=gpart,ivpart=ivparts,cpart=~0)) - # The cluster part, this could be the third part if there are no parentheses - if(len == 3 && is.null(ivpart)) { + if (len == 3 && is.null(ivpart)) { cpart <- ivparts ivparts <- NULL } else { - cpart <- formula(f,lhs=0,rhs=4,drop=TRUE) + cpart <- formula(f, lhs = 0, rhs = 4, drop = TRUE) } - if(!nopart(cpart)) { - # handle the same way as the factors, but without the covariate interaction - tm <- terms(cpart, keep.order=TRUE) - nm <- parts <- attr(tm,'term.labels') - clist <- lapply(paste('factor(',parts,')',sep=''),function(e) parse(text=e)) - cluster <- lapply(clist,eval,data) - names(cluster) <- nm - + if (!nopart(cpart)) { + # handle the same way as the factors, but without the covariate interaction + tm <- terms(cpart, keep.order = TRUE) + nm <- parts <- attr(tm, "term.labels") + clist <- lapply(paste("factor(", parts, ")", sep = ""), function(e) parse(text = e)) + cluster <- lapply(clist, eval, data) + names(cluster) <- nm } else { - cluster <- NULL + cluster <- NULL } - list(formula=opart,fl=fl,iv=ivpart,cluster=cluster,gpart=gpart,ivpart=ivparts,cpart=cpart) + list(formula = opart, fl = fl, iv = ivpart, cluster = cluster, gpart = gpart, ivpart = ivparts, cpart = cpart) } # ivresid is optional, used in 2. stage of 2sls to pass # the difference between the original endogenous variable and the prediction # for the purpose of computing sum of square residuals -doprojols <- function(psys, ivresid=NULL, exactDOF=FALSE, keepX=FALSE, nostats=FALSE) { - if(is.numeric(exactDOF)) { +doprojols <- function(psys, ivresid = NULL, exactDOF = FALSE, keepX = FALSE, nostats = FALSE) { + if (is.numeric(exactDOF)) { df <- exactDOF totvar <- length(psys$y) - df } else { # numrefs is also used later - numrefs <- nrefs(psys$fl, compfactor(psys$fl), exactDOF) - totvar <- totalpvar(psys$fl)-numrefs - df <- length(psys$y)-totvar + numrefs <- nrefs(psys$fl, compfactor(psys$fl), exactDOF) + totvar <- totalpvar(psys$fl) - numrefs + df <- length(psys$y) - totvar } - if(is.null(psys$yxz$x)) { + if (is.null(psys$yxz$x)) { # No covariates - z <- list(N=psys$N, r.residuals=psys$y,fe=psys$fl,p=totvar,Pp=0,cfactor=compfactor(psys$fl), - na.action=psys$na.action, contrasts=psys$contrasts, - fitted.values=psys$y - psys$yxz$y, - coefficients=matrix(double(0),psys$N,0), - df=df, - nostats=FALSE, - model.assign=psys$assign, - model.labels=psys$model.labels, - residuals=psys$yxz$y,clustervar=psys$clustervar, call=match.call()) - z$df.residual <- z$df - class(z) <- 'felm' - return(z) - } + z <- list( + N = psys$N, r.residuals = psys$y, fe = psys$fl, p = totvar, Pp = 0, cfactor = compfactor(psys$fl), + na.action = psys$na.action, contrasts = psys$contrasts, + fitted.values = psys$y - psys$yxz$y, + coefficients = matrix(double(0), psys$N, 0), + df = df, + nostats = FALSE, + model.assign = psys$assign, + model.labels = psys$model.labels, + residuals = psys$yxz$y, clustervar = psys$clustervar, call = match.call() + ) + z$df.residual <- z$df + class(z) <- "felm" + return(z) + } yz <- psys$yxz$y xz <- psys$yxz$x y <- psys$y x <- psys$x fl <- psys$fl icpt <- psys$icpt -# here we just do an lm.fit, however lm.fit is quite slow since -# it doesn't use blas (in particular it can't use e.g. threaded blas in acml) -# so we have rolled our own. - -# we really don't return an 'lm' object or other similar stuff, so -# we should consider using more elementary operations which map to blas-3 -# eg. solve(crossprod(xz),t(xz) %*% yz) -# Or, even invert by solve(crossprod(xz)) since we need -# the diagonal for standard errors. We could use the cholesky inversion -# chol2inv(chol(crossprod(xz))) + # here we just do an lm.fit, however lm.fit is quite slow since + # it doesn't use blas (in particular it can't use e.g. threaded blas in acml) + # so we have rolled our own. + + # we really don't return an 'lm' object or other similar stuff, so + # we should consider using more elementary operations which map to blas-3 + # eg. solve(crossprod(xz),t(xz) %*% yz) + # Or, even invert by solve(crossprod(xz)) since we need + # the diagonal for standard errors. We could use the cholesky inversion + # chol2inv(chol(crossprod(xz))) cp <- crossprod(xz) - b <- crossprod(xz,yz) + b <- crossprod(xz, yz) ch <- cholx(cp) - # ch <- chol(cp) - # beta <- drop(inv %*% (t(xz) %*% yz)) - # remove multicollinearities - badvars <- attr(ch,'badvars') - - if(is.null(badvars)) { - beta <- backsolve(ch,backsolve(ch,b,transpose=TRUE)) -# beta <- as.vector(beta) -# beta <- as.vector(backsolve(ch,backsolve(ch,b,transpose=TRUE))) - if(!nostats) inv <- chol2inv(ch) + # ch <- chol(cp) + # beta <- drop(inv %*% (t(xz) %*% yz)) + # remove multicollinearities + badvars <- attr(ch, "badvars") + + if (is.null(badvars)) { + beta <- backsolve(ch, backsolve(ch, b, transpose = TRUE)) + # beta <- as.vector(beta) + # beta <- as.vector(backsolve(ch,backsolve(ch,b,transpose=TRUE))) + if (!nostats) inv <- chol2inv(ch) } else { beta <- matrix(NaN, nrow(cp), ncol(b)) -# beta <- rep(NaN,nrow(cp)) - beta[-badvars,] <- backsolve(ch,backsolve(ch,b[-badvars,], transpose=TRUE)) - if(!nostats) { - inv <- matrix(NA,nrow(cp),ncol(cp)) - inv[-badvars,-badvars] <- chol2inv(ch) + # beta <- rep(NaN,nrow(cp)) + beta[-badvars, ] <- backsolve(ch, backsolve(ch, b[-badvars, ], transpose = TRUE)) + if (!nostats) { + inv <- matrix(NA, nrow(cp), ncol(cp)) + inv[-badvars, -badvars] <- chol2inv(ch) } } rm(ch, b, cp) - - if(length(fl) > 0 && icpt > 0) - rownames(beta) <- colnames(x)[-icpt] else rownames(beta) <- colnames(x) + + if (length(fl) > 0 && icpt > 0) { + rownames(beta) <- colnames(x)[-icpt] + } else { + rownames(beta) <- colnames(x) + } colnames(beta) <- colnames(y) - if(ncol(beta) == 1) names(beta) <- rownames(beta) - - z <- list(coefficients=beta,badconv=psys$badconv,Pp=ncol(xz)) + if (ncol(beta) == 1) names(beta) <- rownames(beta) + + z <- list(coefficients = beta, badconv = psys$badconv, Pp = ncol(xz)) z$N <- nrow(xz) z$p <- ncol(xz) - length(badvars) - if(!nostats) { + if (!nostats) { z$inv <- inv inv <- nazero(inv) } -# how well would we fit with all the dummies? -# the residuals of the centered model equals the residuals -# of the full model, thus we may compute the fitted values -# resulting from the full model. + # how well would we fit with all the dummies? + # the residuals of the centered model equals the residuals + # of the full model, thus we may compute the fitted values + # resulting from the full model. -# for the 2. step in the 2sls, we should replace -# the instrumented variable with the real ones (the difference is in ivresid) -# when predicting, but only for the purpose of computing -# residuals. + # for the 2. step in the 2sls, we should replace + # the instrumented variable with the real ones (the difference is in ivresid) + # when predicting, but only for the purpose of computing + # residuals. nabeta <- nazero(beta) @@ -266,18 +282,18 @@ doprojols <- function(psys, ivresid=NULL, exactDOF=FALSE, keepX=FALSE, nostats=F z$contrasts <- psys$contrasts z$model.assign <- psys$model.assign z$model.labels <- psys$model.labels - if(length(fl) > 0) { - # insert a zero at the intercept position (x may have an intercept, whereas xz has not) -# if(icpt > 0) ibeta <- append(beta,0,after=icpt-1) else ibeta <- beta - if(icpt > 0) { - pre <- seq_len(icpt-1) + if (length(fl) > 0) { + # insert a zero at the intercept position (x may have an intercept, whereas xz has not) + # if(icpt > 0) ibeta <- append(beta,0,after=icpt-1) else ibeta <- beta + if (icpt > 0) { + pre <- seq_len(icpt - 1) post <- setdiff(seq_len(nrow(beta)), pre) - ibeta <- rbind(beta[pre,,drop=FALSE], 0, beta[post,,drop=FALSE]) + ibeta <- rbind(beta[pre, , drop = FALSE], 0, beta[post, , drop = FALSE]) } else { ibeta <- beta } - pred <- x %*% ifelse(is.na(ibeta),0,ibeta) + pred <- x %*% ifelse(is.na(ibeta), 0, ibeta) z$r.residuals <- y - pred } else { z$r.residuals <- zresid @@ -291,23 +307,23 @@ doprojols <- function(psys, ivresid=NULL, exactDOF=FALSE, keepX=FALSE, nostats=F # the difference are the ivresid, which we must multiply by beta and subtract. # the residuals from the 2nd stage are in iv.residuals # hmm, what about the r.residuals? We modify them as well. They are used in kaczmarz(). - if(!is.null(ivresid)) { - if(!is.matrix(ivresid)) { + if (!is.null(ivresid)) { + if (!is.matrix(ivresid)) { nm <- names(ivresid) - ivresid <- matrix(unlist(ivresid),z$N) + ivresid <- matrix(unlist(ivresid), z$N) colnames(ivresid) <- nm } - z$ivresid <- ivresid %*% nabeta[colnames(ivresid),,drop=FALSE] + z$ivresid <- ivresid %*% nabeta[colnames(ivresid), , drop = FALSE] z$iv.residuals <- z$residuals - z$residuals <- z$residuals - z$ivresid + z$residuals <- z$residuals - z$ivresid z$r.iv.residuals <- z$r.residuals z$r.residuals <- z$r.residuals - z$ivresid } - + z$terms <- psys$terms z$cfactor <- compfactor(fl) totlev <- totalpvar(fl) - if(is.numeric(exactDOF)) { + if (is.numeric(exactDOF)) { z$df <- exactDOF numdum <- z$N - z$p - z$df z$numrefs <- totlev - numdum @@ -318,48 +334,48 @@ doprojols <- function(psys, ivresid=NULL, exactDOF=FALSE, keepX=FALSE, nostats=F } z$df.residual <- z$df z$rank <- z$N - z$df - + z$exactDOF <- exactDOF z$fe <- fl -# should we subtract 1 for an intercept? -# a similar adjustment is done in summary.felm when computing rdf - z$p <- z$p + numdum - 1 + # should we subtract 1 for an intercept? + # a similar adjustment is done in summary.felm when computing rdf + z$p <- z$p + numdum - 1 z$xp <- z$p z$na.action <- psys$na.action - class(z) <- 'felm' + class(z) <- "felm" cluster <- psys$clustervar z$clustervar <- cluster - if(nostats) { + if (nostats) { z$nostats <- TRUE return(z) } z$nostats <- FALSE -# then we go about creating the covariance matrices and tests -# if there is a single lhs, they are just stored as matrices etc -# in z. If there are multiple lhs, these quantities are inserted -# in a list z$STATS indexed by z$lhs -# indexed by the name of the lhs + # then we go about creating the covariance matrices and tests + # if there is a single lhs, they are just stored as matrices etc + # in z. If there are multiple lhs, these quantities are inserted + # in a list z$STATS indexed by z$lhs + # indexed by the name of the lhs vcvnames <- list(rownames(beta), rownames(beta)) Ncoef <- nrow(beta) singlelhs <- length(z$lhs) == 1 - if(!singlelhs) z$STATS <- list() + if (!singlelhs) z$STATS <- list() - for(lhs in z$lhs) { - res <- z$residuals[,lhs] + for (lhs in z$lhs) { + res <- z$residuals[, lhs] -# if(!is.null(ivresid)) res <- res - z$ivresid + # if(!is.null(ivresid)) res <- res - z$ivresid - vcvfactor <- sum(res**2)/z$df + vcvfactor <- sum(res**2) / z$df -# when multiple lhs, vcvfactor is a vector -# we need a list of vcvs in this case + # when multiple lhs, vcvfactor is a vector + # we need a list of vcvs in this case - if(singlelhs) { + if (singlelhs) { z$vcv <- z$inv * vcvfactor setdimnames(z$vcv, vcvnames) } else { @@ -368,28 +384,28 @@ doprojols <- function(psys, ivresid=NULL, exactDOF=FALSE, keepX=FALSE, nostats=F setdimnames(z$STATS[[lhs]]$vcv, vcvnames) } -# dimnames(z$vcv) <- list(names(beta),names(beta)) - - # We should make the robust covariance matrix too. - # it's inv * sum (X_i' u_i u_i' X_i) * inv - # where u_i are the (full) residuals (Wooldridge, 10.5.4 (10.59)) - # i.e. inv * sum(u_i^2 X_i' X_i) * inv - # for large datasets the sum is probably best computed by a series of scaled - # rank k updates, i.e. the dsyrk blas routine, we make an R-version of it. - # need to check this computation, the SE's are slightly numerically different from Stata's. - # it seems stata does not do the small-sample adjustment - dfadj <- z$N/z$df - - # Now, here's an optimzation for very large xz. If we use the wcrossprod and ccrossprod - # functions, we can't get rid of xz, we end up with a copy of it which blows away memory. - # we need to scale xz with the residuals in xz, but we don't want to expand res to a full matrix, - # and even get a copy in the result. - # Thus we modify it in place with a .Call. The scaled variant is also used in the cluster computation. -# z$robustvcv <- dfadj * inv %*% wcrossprod(xz,res) %*% inv - - rscale <- ifelse(res==0,1e-40,res) + # dimnames(z$vcv) <- list(names(beta),names(beta)) + + # We should make the robust covariance matrix too. + # it's inv * sum (X_i' u_i u_i' X_i) * inv + # where u_i are the (full) residuals (Wooldridge, 10.5.4 (10.59)) + # i.e. inv * sum(u_i^2 X_i' X_i) * inv + # for large datasets the sum is probably best computed by a series of scaled + # rank k updates, i.e. the dsyrk blas routine, we make an R-version of it. + # need to check this computation, the SE's are slightly numerically different from Stata's. + # it seems stata does not do the small-sample adjustment + dfadj <- z$N / z$df + + # Now, here's an optimzation for very large xz. If we use the wcrossprod and ccrossprod + # functions, we can't get rid of xz, we end up with a copy of it which blows away memory. + # we need to scale xz with the residuals in xz, but we don't want to expand res to a full matrix, + # and even get a copy in the result. + # Thus we modify it in place with a .Call. The scaled variant is also used in the cluster computation. + # z$robustvcv <- dfadj * inv %*% wcrossprod(xz,res) %*% inv + + rscale <- ifelse(res == 0, 1e-40, res) .Call(C_scalecols, xz, rscale) - if(singlelhs) { + if (singlelhs) { z$robustvcv <- dfadj * inv %*% crossprod(xz) %*% inv setdimnames(z$robustvcv, vcvnames) } else { @@ -398,26 +414,26 @@ doprojols <- function(psys, ivresid=NULL, exactDOF=FALSE, keepX=FALSE, nostats=F } - # then the clustered covariance matrix - if(!is.null(cluster)) { - method <- attr(cluster,'method') - if(is.null(method)) method <- 'cgm' - dfadj <- (z$N-1)/z$df + # then the clustered covariance matrix + if (!is.null(cluster)) { + method <- attr(cluster, "method") + if (is.null(method)) method <- "cgm" + dfadj <- (z$N - 1) / z$df d <- length(cluster) - if(method == 'cgm') { -# meat <- matrix(0,nrow(z$vcv),ncol(z$vcv)) - meat <- matrix(0,Ncoef,Ncoef) - for(i in 1:(2^d-1)) { + if (method == "cgm") { + # meat <- matrix(0,nrow(z$vcv),ncol(z$vcv)) + meat <- matrix(0, Ncoef, Ncoef) + for (i in 1:(2^d - 1)) { # Find out which ones to interact iac <- as.logical(intToBits(i))[1:d] # odd number is positive, even is negative - sgn <- 2*(sum(iac) %% 2) - 1 + sgn <- 2 * (sum(iac) %% 2) - 1 # interact the factors - ia <- factor(do.call(paste,c(cluster[iac],sep='\004'))) - adj <- sgn*dfadj*nlevels(ia)/(nlevels(ia)-1) - .Call(C_dsyrk,1,meat,adj,rowsum(xz,ia)) + ia <- factor(do.call(paste, c(cluster[iac], sep = "\004"))) + adj <- sgn * dfadj * nlevels(ia) / (nlevels(ia) - 1) + .Call(C_dsyrk, 1, meat, adj, rowsum(xz, ia)) } - if(singlelhs) { + if (singlelhs) { z$clustervcv <- inv %*% meat %*% inv setdimnames(z$clustervcv, vcvnames) } else { @@ -445,52 +461,51 @@ doprojols <- function(psys, ivresid=NULL, exactDOF=FALSE, keepX=FALSE, nostats=F ## z$clustervcv <- inv %*% meat %*% inv ## rm(meat) } else { - stop('unknown multi way cluster algorithm:',method) + stop("unknown multi way cluster algorithm:", method) } - - - if(singlelhs) { + + + if (singlelhs) { z$cse <- sqrt(diag(z$clustervcv)) - z$ctval <- coef(z)/z$cse - z$cpval <- 2*pt(abs(z$ctval),z$df,lower.tail=FALSE) + z$ctval <- coef(z) / z$cse + z$cpval <- 2 * pt(abs(z$ctval), z$df, lower.tail = FALSE) } else { z$STATS[[lhs]]$cse <- sqrt(diag(z$STATS[[lhs]]$clustervcv)) - z$STATS[[lhs]]$ctval <- z$coefficients[,lhs]/z$STATS[[lhs]]$cse - z$STATS[[lhs]]$cpval <- 2*pt(abs(z$STATS[[lhs]]$ctval),z$df,lower.tail=FALSE) + z$STATS[[lhs]]$ctval <- z$coefficients[, lhs] / z$STATS[[lhs]]$cse + z$STATS[[lhs]]$cpval <- 2 * pt(abs(z$STATS[[lhs]]$ctval), z$df, lower.tail = FALSE) } } - if(singlelhs) { + if (singlelhs) { z$se <- sqrt(diag(z$vcv)) - z$tval <- z$coefficients/z$se - z$pval <- 2*pt(abs(z$tval),z$df,lower.tail=FALSE) + z$tval <- z$coefficients / z$se + z$pval <- 2 * pt(abs(z$tval), z$df, lower.tail = FALSE) z$rse <- sqrt(diag(z$robustvcv)) - z$rtval <- coef(z)/z$rse - z$rpval <- 2*pt(abs(z$rtval),z$df,lower.tail=FALSE) + z$rtval <- coef(z) / z$rse + z$rpval <- 2 * pt(abs(z$rtval), z$df, lower.tail = FALSE) } else { z$STATS[[lhs]]$se <- sqrt(diag(z$STATS[[lhs]]$vcv)) - z$STATS[[lhs]]$tval <- z$coefficients[,lhs]/z$STATS[[lhs]]$se - z$STATS[[lhs]]$pval <- 2*pt(abs(z$STATS[[lhs]]$tval),z$df,lower.tail=FALSE) + z$STATS[[lhs]]$tval <- z$coefficients[, lhs] / z$STATS[[lhs]]$se + z$STATS[[lhs]]$pval <- 2 * pt(abs(z$STATS[[lhs]]$tval), z$df, lower.tail = FALSE) z$STATS[[lhs]]$rse <- sqrt(diag(z$STATS[[lhs]]$robustvcv)) - z$STATS[[lhs]]$rtval <- z$coefficients[,lhs]/z$STATS[[lhs]]$rse - z$STATS[[lhs]]$rpval <- 2*pt(abs(z$STATS[[lhs]]$rtval),z$df,lower.tail=FALSE) + z$STATS[[lhs]]$rtval <- z$coefficients[, lhs] / z$STATS[[lhs]]$rse + z$STATS[[lhs]]$rpval <- 2 * pt(abs(z$STATS[[lhs]]$rtval), z$df, lower.tail = FALSE) } # reset this for next lhs - if(!singlelhs) .Call(C_scalecols, xz, 1/rscale) + if (!singlelhs) .Call(C_scalecols, xz, 1 / rscale) } - + z } -project <- function(mf,fl,data,contrasts,clustervar=NULL,pf=parent.frame()) { - +project <- function(mf, fl, data, contrasts, clustervar = NULL, pf = parent.frame()) { m <- match(c("formula", "data", "subset", "na.action"), names(mf), 0L) mf <- mf[c(1L, m)] mf$drop.unused.levels <- TRUE mf[[1L]] <- quote(model.frame) - subspec <- mf[['subset']] + subspec <- mf[["subset"]] # we should handle multiple lhs # but how? model.frame() doesn't handle it, but we need @@ -498,91 +513,100 @@ project <- function(mf,fl,data,contrasts,clustervar=NULL,pf=parent.frame()) { # included. We create an artifical single lhs by summing the left hand # sides, just to get hold of the rhs. Then we extract the left hand side - Form <- as.Formula(mf[['formula']]) - mf[['formula']] <- Form + Form <- as.Formula(mf[["formula"]]) + mf[["formula"]] <- Form mf <- eval(mf, pf) - mt <- attr(mf,'terms') - naact <- attr(mf,'na.action') - if(!is.null(naact)) + mt <- attr(mf, "terms") + naact <- attr(mf, "na.action") + if (!is.null(naact)) { naclass <- class(naact) + } fullN <- nrow(mf) + length(naact) # then obtain the response matrix through Formula::model.part - response <- as.matrix(model.part(Form, mf, lhs=NULL,rhs=0)) + response <- as.matrix(model.part(Form, mf, lhs = NULL, rhs = 0)) - cmethod <- attr(clustervar,'method') - if(!is.null(clustervar)) { - if(is.character(clustervar)) clustervar <- as.list(clustervar) - if(!is.list(clustervar)) clustervar <- list(clustervar) - clustervar <- lapply(clustervar, function(cv) { - if(!is.character(cv)) factor(cv) else factor(data[,cv]) - }) + cmethod <- attr(clustervar, "method") + if (!is.null(clustervar)) { + if (is.character(clustervar)) clustervar <- as.list(clustervar) + if (!is.list(clustervar)) clustervar <- list(clustervar) + clustervar <- lapply(clustervar, function(cv) { + if (!is.character(cv)) factor(cv) else factor(data[, cv]) + }) } # we need to change clustervar and factor list to reflect # subsetting and na.action. na.action is ok, it's set as an attribute in mf # but subset must be done manually. It's done before na handling - if(!is.null(subspec)) { - subs <- eval(subspec,pf) - if(!is.null(clustervar)) clustervar <- lapply(clustervar,function(cv) cv[subs]) + if (!is.null(subspec)) { + subs <- eval(subspec, pf) + if (!is.null(clustervar)) clustervar <- lapply(clustervar, function(cv) cv[subs]) fl <- lapply(fl, function(fac) { f <- factor(fac[subs]) - x <- attr(f,'x') - if(is.null(x)) return(f) - structure(f,x=x[subs]) + x <- attr(f, "x") + if (is.null(x)) { + return(f) + } + structure(f, x = x[subs]) }) - } - if(!is.null(naact)) { - if(!is.null(clustervar)) clustervar <- lapply(clustervar, function(cv) cv[-naact]) - fl <- lapply(fl,function(fac) { + if (!is.null(naact)) { + if (!is.null(clustervar)) clustervar <- lapply(clustervar, function(cv) cv[-naact]) + fl <- lapply(fl, function(fac) { f <- factor(fac[-naact]) - x <- attr(f,'x') - if(is.null(x)) return(f) - structure(f,x=x[-naact]) + x <- attr(f, "x") + if (is.null(x)) { + return(f) + } + structure(f, x = x[-naact]) }) } - attr(clustervar,'method') <- cmethod + attr(clustervar, "method") <- cmethod -# ret <- list(fl=fl, na.action=naact,terms=mt,clustervar=clustervar, y=model.response(mf,'numeric')) - ret <- list(fl=fl, na.action=naact, fullN=fullN, terms=mt,clustervar=clustervar, y=response) - rm(mt,clustervar,naact) + # ret <- list(fl=fl, na.action=naact,terms=mt,clustervar=clustervar, y=model.response(mf,'numeric')) + ret <- list(fl = fl, na.action = naact, fullN = fullN, terms = mt, clustervar = clustervar, y = response) + rm(mt, clustervar, naact) - lapply(ret$clustervar, function(f) - if(length(f) != nrow(ret$y)) stop('cluster factors are not the same length as data ', - length(f),'!=',nrow(ret$y))) + lapply(ret$clustervar, function(f) { + if (length(f) != nrow(ret$y)) { + stop( + "cluster factors are not the same length as data ", + length(f), "!=", nrow(ret$y) + ) + } + }) # in case of cluster factor specified with the clustervar argument: -# try a sparse model matrix to save memory when removing intercept -# though, demeanlist must be full. Ah, no, not much to save because -# it won't be sparse after centering -# we should rather let demeanlist remove the intercept, this -# will save memory by not copying. But we need to remove it below in x %*% beta -# (or should we extend beta with a zero at the right place, it's only -# a vector, eh, is it, do we not allow matrix lhs? No.) + # try a sparse model matrix to save memory when removing intercept + # though, demeanlist must be full. Ah, no, not much to save because + # it won't be sparse after centering + # we should rather let demeanlist remove the intercept, this + # will save memory by not copying. But we need to remove it below in x %*% beta + # (or should we extend beta with a zero at the right place, it's only + # a vector, eh, is it, do we not allow matrix lhs? No.) -# we make some effort to avoid copying the data matrix below -# this includes assigning to lists in steps, with gc() here and there. -# It's done for R 3.0.2. The copy semantics could be changed in later versions. + # we make some effort to avoid copying the data matrix below + # this includes assigning to lists in steps, with gc() here and there. + # It's done for R 3.0.2. The copy semantics could be changed in later versions. -# ret$x <- model.matrix(ret$terms,mf,contrasts) + # ret$x <- model.matrix(ret$terms,mf,contrasts) ret$x <- model.matrix(Form, mf, contrasts) rm(mf) - ret$contrasts <- attr(ret$x,'contrasts') - ret$model.assign <- attr(ret$x, 'assign') - ret$model.labels <- attr(terms(Form[-2]), 'term.labels') # ditch lhs when finding terms - icpt <- attr(ret$x,'assign') == 0 - if(!any(icpt)) icpt <- 0 else icpt <- which(icpt) + ret$contrasts <- attr(ret$x, "contrasts") + ret$model.assign <- attr(ret$x, "assign") + ret$model.labels <- attr(terms(Form[-2]), "term.labels") # ditch lhs when finding terms + icpt <- attr(ret$x, "assign") == 0 + if (!any(icpt)) icpt <- 0 else icpt <- which(icpt) ret$icpt <- icpt ncov <- ncol(ret$x) - (icpt > 0) - if(ncov == 0) { + if (ncov == 0) { ret$x <- NULL - ret$yxz <- list(y=demeanlist(ret$y,fl)) + ret$yxz <- list(y = demeanlist(ret$y, fl)) ret$Pp <- 0 ret$N <- length(ret$y) - ret$yx <- list(y=ret$y) + ret$yx <- list(y = ret$y) return(ret) } @@ -591,87 +615,87 @@ project <- function(mf,fl,data,contrasts,clustervar=NULL,pf=parent.frame()) { # hmm, the list() copies the stuff. How can we avoid a copy # and still enable parallelization over y and x in demeanlist? A vararg demeanlist? # I.e. an .External version? Yes. -# yx <- list(y=ret$y, x=ret$x) -# gc() -# ret$yxz <- demeanlist(yx,fl,icpt) -# rm(fl,yx); gc() - -# ret$yxz <- edemeanlist(y=ret$y,x=ret$x,fl=fl,icpt=c(0,icpt)) - ret$yxz <- demeanlist(list(y=ret$y,x=ret$x),fl=fl,icpt=c(0,icpt)) - ret$badconv <- attr(ret$yxz$x,'badconv') + attr(ret$yxz$y,'badconv') + # yx <- list(y=ret$y, x=ret$x) + # gc() + # ret$yxz <- demeanlist(yx,fl,icpt) + # rm(fl,yx); gc() + + # ret$yxz <- edemeanlist(y=ret$y,x=ret$x,fl=fl,icpt=c(0,icpt)) + ret$yxz <- demeanlist(list(y = ret$y, x = ret$x), fl = fl, icpt = c(0, icpt)) + ret$badconv <- attr(ret$yxz$x, "badconv") + attr(ret$yxz$y, "badconv") # use our homebrewn setdimnames instead of colnames. colnames copies. - if(length(fl) > 0) { - if(icpt == 0) - setdimnames(ret$yxz$x, list(NULL,colnames(ret$x))) - else - setdimnames(ret$yxz$x, list(NULL,colnames(ret$x)[-icpt])) + if (length(fl) > 0) { + if (icpt == 0) { + setdimnames(ret$yxz$x, list(NULL, colnames(ret$x))) + } else { + setdimnames(ret$yxz$x, list(NULL, colnames(ret$x)[-icpt])) + } } ret } #' @export -..oldfelm <- function(formula, data, exactDOF=FALSE, subset, na.action, contrasts=NULL,...) { - - knownargs <- c('iv', 'clustervar', 'cmethod', 'keepX', 'nostats') +..oldfelm <- function(formula, data, exactDOF = FALSE, subset, na.action, contrasts = NULL, ...) { + knownargs <- c("iv", "clustervar", "cmethod", "keepX", "nostats") keepX <- FALSE - cmethod <- 'cgm' + cmethod <- "cgm" iv <- NULL clustervar <- NULL nostats <- FALSE - deprec <- c('iv', 'clustervar') + deprec <- c("iv", "clustervar") -# sc <- names(sys.call())[-1] -# named <- knownargs[pmatch(sc,knownargs)] -# for(arg in c('iv', 'clustervar')) { -# if(!is.null(eval(as.name(arg))) && !(arg %in% named)) { -# warning("Please specify the '",arg,"' argument by name, or use a multi part formula. Its position in the argument list will change in a later version") -# } -# } + # sc <- names(sys.call())[-1] + # named <- knownargs[pmatch(sc,knownargs)] + # for(arg in c('iv', 'clustervar')) { + # if(!is.null(eval(as.name(arg))) && !(arg %in% named)) { + # warning("Please specify the '",arg,"' argument by name, or use a multi part formula. Its position in the argument list will change in a later version") + # } + # } mf <- match.call(expand.dots = TRUE) # Currently there shouldn't be any ... arguments # check that the list is empty -# if(length(mf[['...']]) > 0) stop('unknown argument ',mf['...']) - + # if(length(mf[['...']]) > 0) stop('unknown argument ',mf['...']) + # When moved to the ... list, we use this: # we do it right away, iv and clustervar can't possibly end up in ... yet, not with normal users args <- list(...) - ka <- knownargs[pmatch(names(args),knownargs, duplicates.ok=FALSE)] + ka <- knownargs[pmatch(names(args), knownargs, duplicates.ok = FALSE)] names(args)[!is.na(ka)] <- ka[!is.na(ka)] dpr <- deprec[match(ka, deprec)] - if(any(!is.na(dpr))) { + if (any(!is.na(dpr))) { bad <- dpr[which(!is.na(dpr))] - warning('Argument(s) ',paste(bad,collapse=','), ' are deprecated and will be removed, use multipart formula instead') + warning("Argument(s) ", paste(bad, collapse = ","), " are deprecated and will be removed, use multipart formula instead") } env <- environment() - lapply(intersect(knownargs,ka), function(arg) assign(arg,args[[arg]], pos=env)) - if(!(cmethod %in% c('cgm','gaure'))) stop('Unknown cmethod: ',cmethod) + lapply(intersect(knownargs, ka), function(arg) assign(arg, args[[arg]], pos = env)) + if (!(cmethod %in% c("cgm", "gaure"))) stop("Unknown cmethod: ", cmethod) # also implement a check for unknown arguments unk <- setdiff(names(args), knownargs) - if(length(unk) > 0) stop('unknown arguments ',paste(unk, collapse=' ')) + if (length(unk) > 0) stop("unknown arguments ", paste(unk, collapse = " ")) - if(missing(data)) mf$data <- data <- environment(formula) + if (missing(data)) mf$data <- data <- environment(formula) pf <- parent.frame() - pform <- parseformula(formula,data) - - if(!is.null(iv) && !is.null(pform[['iv']])) stop("Specify EITHER iv argument(deprecated) OR multipart terms, not both") - if(!is.null(pform[['cluster']]) && !is.null(clustervar)) stop("Specify EITHER clustervar(deprecated) OR multipart terms, not both") - if(!is.null(pform[['cluster']])) clustervar <- structure(pform[['cluster']], method=cmethod) - if(is.null(iv) && is.null(pform[['iv']])) { + pform <- parseformula(formula, data) + + if (!is.null(iv) && !is.null(pform[["iv"]])) stop("Specify EITHER iv argument(deprecated) OR multipart terms, not both") + if (!is.null(pform[["cluster"]]) && !is.null(clustervar)) stop("Specify EITHER clustervar(deprecated) OR multipart terms, not both") + if (!is.null(pform[["cluster"]])) clustervar <- structure(pform[["cluster"]], method = cmethod) + if (is.null(iv) && is.null(pform[["iv"]])) { # no iv, just do the thing - fl <- pform[['fl']] - formula <- pform[['formula']] - mf[['formula']] <- formula - psys <- project(mf,fl,data,contrasts,clustervar,pf) - z <- doprojols(psys,exactDOF=exactDOF, nostats=nostats[1]) - if(keepX) z$X <- if(psys$icpt > 0) psys$x[,-psys$icpt] else psys$x + fl <- pform[["fl"]] + formula <- pform[["formula"]] + mf[["formula"]] <- formula + psys <- project(mf, fl, data, contrasts, clustervar, pf) + z <- doprojols(psys, exactDOF = exactDOF, nostats = nostats[1]) + if (keepX) z$X <- if (psys$icpt > 0) psys$x[, -psys$icpt] else psys$x rm(psys) z$parent.frame <- pf @@ -680,79 +704,87 @@ project <- function(mf,fl,data,contrasts,clustervar=NULL,pf=parent.frame()) { } # IV. Clean up formulas, set up for 1st stages - if(!is.null(iv)) { + if (!is.null(iv)) { # warning("argument iv is deprecated, use multipart formula instead") - if(!is.list(iv)) iv <- list(iv) - form <- pform[['formula']] + if (!is.list(iv)) iv <- list(iv) + form <- pform[["formula"]] # Old syntax, the IV-variables are also in the main equation, remove them - for(ivv in iv) { + for (ivv in iv) { ivnam <- ivv[[2]] # create the new formula by removing the IV lhs. - form <- update(form, substitute(. ~ . - Z,list(Z=ivnam))) + form <- update(form, substitute(. ~ . - Z, list(Z = ivnam))) } - pform[['formula']] <- form - mf[['iv']] <- NULL + pform[["formula"]] <- form + mf[["iv"]] <- NULL ivpart <- NULL } else { - iv <- pform[['iv']] - ivpart <- as.Formula(pform[['ivpart']]) + iv <- pform[["iv"]] + ivpart <- as.Formula(pform[["ivpart"]]) } - if(is.environment(data)) { - ivenv <- new.env(parent=data) + if (is.environment(data)) { + ivenv <- new.env(parent = data) } else { - ivenv <- new.env(parent=pf) + ivenv <- new.env(parent = pf) } - if(!is.null(ivpart)) { + if (!is.null(ivpart)) { # ivpart is something like ~(P|Q ~ x+x2) # strip ~ and () ivpart <- as.Formula(ivpart[[2]][[2]]) - lhs <- formula(ivpart, lhs=NULL, rhs=0) - rhs <- as.Formula(formula(ivpart, lhs=0, rhs=1)) - if(length(ivpart)[[2]] > 1) { + lhs <- formula(ivpart, lhs = NULL, rhs = 0) + rhs <- as.Formula(formula(ivpart, lhs = 0, rhs = 1)) + if (length(ivpart)[[2]] > 1) { stop("Instruments can't be projected out: ", ivpart) - rhsg <- as.Formula(formula(ivpart, lhs=0, rhs=2)) + rhsg <- as.Formula(formula(ivpart, lhs = 0, rhs = 2)) } else { - rhsg <- list(NULL,NULL) + rhsg <- list(NULL, NULL) } Form <- as.Formula(formula) - if(length(Form)[2] == 4) { - cluform <- formula(Form,lhs=0,rhs=4)[[2]] - step1form <- as.Formula(substitute(L ~ B + ivO | G|0|C, - list(L=lhs[[2]], B=pform[['formula']][[3]], - ivO=rhs[[2]], - G=pform[['gpart']][[2]], - C=cluform))) - + if (length(Form)[2] == 4) { + cluform <- formula(Form, lhs = 0, rhs = 4)[[2]] + step1form <- as.Formula(substitute( + L ~ B + ivO | G | 0 | C, + list( + L = lhs[[2]], B = pform[["formula"]][[3]], + ivO = rhs[[2]], + G = pform[["gpart"]][[2]], + C = cluform + ) + )) } else { - step1form <- as.Formula(substitute(L ~ B + ivO | G, - list(L=lhs[[2]], B=pform[['formula']][[3]], - ivO=rhs[[2]], - G=pform[['gpart']][[2]]))) + step1form <- as.Formula(substitute( + L ~ B + ivO | G, + list( + L = lhs[[2]], B = pform[["formula"]][[3]], + ivO = rhs[[2]], + G = pform[["gpart"]][[2]] + ) + )) } environment(step1form) <- environment(formula) - ivform <- parseformula(step1form,data) - fl <- ivform[['fl']] - mf[['formula']] <- ivform[['formula']] - environment(mf[['formula']]) <- environment(formula) - psys <- project(mf,fl,data,contrasts,clustervar,pf) + ivform <- parseformula(step1form, data) + fl <- ivform[["fl"]] + mf[["formula"]] <- ivform[["formula"]] + environment(mf[["formula"]]) <- environment(formula) + psys <- project(mf, fl, data, contrasts, clustervar, pf) - if(length(nostats) == 2) - nost <- nostats[2] - else - nost <- nostats[1] - z1 <- doprojols(psys, exactDOF=exactDOF, nostats=nost) + if (length(nostats) == 2) { + nost <- nostats[2] + } else { + nost <- nostats[1] + } + z1 <- doprojols(psys, exactDOF = exactDOF, nostats = nost) # put the fitted values in ivenv - for(n in colnames(z1$fitted.values)) { - nn <- paste(n,'(fit)',sep='') - assign(nn,z1$fitted.values[,n],envir=ivenv) + for (n in colnames(z1$fitted.values)) { + nn <- paste(n, "(fit)", sep = "") + assign(nn, z1$fitted.values[, n], envir = ivenv) } - z1$endogvars <- paste('`',colnames(z1$fitted.values),'(fit)`',sep='') + z1$endogvars <- paste("`", colnames(z1$fitted.values), "(fit)`", sep = "") # we should not use all.vars(rhs), to find the instruments, but # pick up things from z1 somehow, in case there are expanded @@ -762,7 +794,7 @@ project <- function(mf,fl,data,contrasts,clustervar=NULL,pf=parent.frame()) { cname <- rownames(z1$coefficients) ivnam <- all.vars(rhs) asgn <- z1$model.assign - if(length(asgn) != length(cname)) asgn <- asgn[asgn!=0] + if (length(asgn) != length(cname)) asgn <- asgn[asgn != 0] # now assume there's a factor f among the instruments, with levels f2-f4 (1 is a reference) # we look up 'f' in lab, it has position 5, then everything with assign == 5 belong to this factor ivars <- cname[asgn %in% which(z1$model.labels %in% ivnam)] @@ -770,83 +802,90 @@ project <- function(mf,fl,data,contrasts,clustervar=NULL,pf=parent.frame()) { # Store any dummy instruments in the ivenv, for possible later use in condfstat # There's a psys$x which is the model matrix # we may as well store all the instruments there - for(ivar in ivars) { - if(!(ivar %in% ivnam)) - assign(ivar,psys$x[,ivar],envir=ivenv) + for (ivar in ivars) { + if (!(ivar %in% ivnam)) { + assign(ivar, psys$x[, ivar], envir = ivenv) + } } - if(!nost) { - z1$iv1fstat <- lapply(z1$lhs, function(lh) waldtest(z1,ivars, lhs=lh)) + if (!nost) { + z1$iv1fstat <- lapply(z1$lhs, function(lh) waldtest(z1, ivars, lhs = lh)) names(z1$iv1fstat) <- z1$lhs - z1$rob.iv1fstat <- lapply(z1$lhs, function(lh) waldtest(z1,ivars,type='robust', lhs=lh)) + z1$rob.iv1fstat <- lapply(z1$lhs, function(lh) waldtest(z1, ivars, type = "robust", lhs = lh)) names(z1$rob.iv1fstat) <- z1$lhs } z1$call <- match.call() environment(step1form) <- environment(formula) - z1$call[['formula']] <- step1form + z1$call[["formula"]] <- step1form naact <- psys$na.action - FIT <- as.formula(paste('~',paste(z1$endogvars,sep='', collapse='+'),sep='')) - step2form <- as.Formula(substitute(y ~ B + FIT | G, - list(y=pform[['formula']][[2]], - B=pform[['formula']][[3]], - FIT=FIT[[2]], - G=pform[['gpart']][[2]]))) + FIT <- as.formula(paste("~", paste(z1$endogvars, sep = "", collapse = "+"), sep = "")) + step2form <- as.Formula(substitute( + y ~ B + FIT | G, + list( + y = pform[["formula"]][[2]], + B = pform[["formula"]][[3]], + FIT = FIT[[2]], + G = pform[["gpart"]][[2]] + ) + )) # do the first step environment(step2form) <- environment(formula) form2 <- parseformula(step2form, data) - fl <- form2[['fl']] - formula <- form2[['formula']] + fl <- form2[["fl"]] + formula <- form2[["formula"]] environment(formula) <- ivenv mf$formula <- formula - if(is.environment(mf$data)) mf$data <- ivenv + if (is.environment(mf$data)) mf$data <- ivenv # remove the naact from 1st step # any missings in the exogenous variables will be missing in both the 1st and 2nd stage # If there are missing instruments or endogenous variables they will be missing in 1st stage, but not in the 2nd # so we must make them missing in the 2nd stage as well. We implement that as a subset. - if(!is.null(naact)) { + if (!is.null(naact)) { # naact is a vector of observations to remove # numbered after a subset has been taken. # if there's no subset in mf, we create one - subset <- mf[['subset']] - if(is.null(subset)) { - mf[['subset']] <- -naact + subset <- mf[["subset"]] + if (is.null(subset)) { + mf[["subset"]] <- -naact } else { # subset is an indexing vector for the rows in the data frame # naact specifies rows to remove after subsetting. # we simulate this process. Let's make an integer vector for the # rows of the data. The total length can be found - mf[['subset']] <- seq_len(psys$fullN)[subset][-naact] + mf[["subset"]] <- seq_len(psys$fullN)[subset][-naact] } } - psys <- project(mf=mf,fl=fl,data=data,contrasts=contrasts,clustervar=clustervar,pf=pf) + psys <- project(mf = mf, fl = fl, data = data, contrasts = contrasts, clustervar = clustervar, pf = pf) ivres <- z1$residuals - colnames(ivres) <- as.character(sapply(all.vars(FIT),as.name)) - z <- doprojols(psys, ivresid=ivres, exactDOF=exactDOF, nostats=nostats) + colnames(ivres) <- as.character(sapply(all.vars(FIT), as.name)) + z <- doprojols(psys, ivresid = ivres, exactDOF = exactDOF, nostats = nostats) z$stage1 <- z1 z$st2call <- mf -# backwards compatibility + # backwards compatibility z$step1 <- lapply(z1$lhs, function(lh) { -# warning('Use stage1 instead of step1') + # warning('Use stage1 instead of step1') foo <- z1 foo$lhs <- lh - foo$beta <- z1$beta[,lh,drop=FALSE] + foo$beta <- z1$beta[, lh, drop = FALSE] foo$coefficients <- foo$beta - foo$response <- z1$response[,lh,drop=FALSE] - foo$fitted.values <- z1$fitted.values[,lh,drop=FALSE] - foo$residuals <- z1$residuals[,lh,drop=FALSE] - foo$r.residuals <- z1$r.residuals[,lh,drop=FALSE] - if(!is.null(z1$iv1fstat)) { + foo$response <- z1$response[, lh, drop = FALSE] + foo$fitted.values <- z1$fitted.values[, lh, drop = FALSE] + foo$residuals <- z1$residuals[, lh, drop = FALSE] + foo$r.residuals <- z1$r.residuals[, lh, drop = FALSE] + if (!is.null(z1$iv1fstat)) { foo$iv1fstat <- z1$iv1fstat[lh] foo$rob.iv1fstat <- z1$rob.iv1fstat[lh] } - if(!is.null(z1$STATS)) - foo[names(z1$STATS[[lh]])] <- z1$STATS[[lh]] - foo[['STATS']] <- NULL - foo}) + if (!is.null(z1$STATS)) { + foo[names(z1$STATS[[lh]])] <- z1$STATS[[lh]] + } + foo[["STATS"]] <- NULL + foo + }) z$endovars <- z1$endogvars z$parent.frame <- pf rm(psys) @@ -855,7 +894,7 @@ project <- function(mf,fl,data,contrasts,clustervar=NULL,pf=parent.frame()) { } # parse the IV-formulas, they may contain factor-parts - iv <- lapply(iv,parseformula,data) + iv <- lapply(iv, parseformula, data) # Now, insert the rhs of the IV in the formula # find the ordinary and the factor part @@ -863,16 +902,24 @@ project <- function(mf,fl,data,contrasts,clustervar=NULL,pf=parent.frame()) { # It may contain a factor part # this is the template for the step1 formula, just insert a left hand side - step1form <- formula(as.Formula(substitute(~B +ivO | G + ivG, - list(B=pform[['formula']][[3]],G=pform[['gpart']][[2]])))) + step1form <- formula(as.Formula(substitute( + ~ B + ivO | G + ivG, + list(B = pform[["formula"]][[3]], G = pform[["gpart"]][[2]]) + ))) # this is the template for the second stage, it is updated with the IV variables - step2form <- formula(as.Formula(substitute(y~B | G, - list(y=pform[['formula']][[2]],B=pform[['formula']][[3]], - G=pform[['gpart']][[2]])))) - - nullbase <- formula(as.Formula(substitute(~B | G, - list(B=pform[['formula']][[3]],G=pform[['gpart']][[2]])))) + step2form <- formula(as.Formula(substitute( + y ~ B | G, + list( + y = pform[["formula"]][[2]], B = pform[["formula"]][[3]], + G = pform[["gpart"]][[2]] + ) + ))) + + nullbase <- formula(as.Formula(substitute( + ~ B | G, + list(B = pform[["formula"]][[3]], G = pform[["gpart"]][[2]]) + ))) # we must do the 1. step for each instrumented variable # collect the instrumented variables and remove them from origform @@ -887,85 +934,87 @@ project <- function(mf,fl,data,contrasts,clustervar=NULL,pf=parent.frame()) { vars <- NULL step1 <- list() endolist <- c() - for(ivv in iv) { + for (ivv in iv) { # Now, make the full instrumental formula, i.e. with the rhs expanded with the # instruments, and the lhs equal to the instrumented variable - ivlhs <- ivv[['formula']][[2]] - rhsivo <- formula(as.Formula(ivv[['formula']]),lhs=0)[[2]] - if(nopart(ivv[['gpart']])) - rhsivg <- 0 - else - rhsivg <- formula(ivv[['gpart']],lhs=0,rhs=2)[[2]] - - fformula <- substitute(Z ~ R, list(Z=ivlhs, R=step1form[[2]])) - - fformula <- do.call(substitute, list(fformula,list(ivO=rhsivo,ivG=rhsivg))) - - ivform <- parseformula(fformula,data) - fl <- ivform[['fl']] - mf[['formula']] <- ivform[['formula']] - - # note that if there are no G() terms among the instrument variables, - # all the other covariates should only be centered once, not in every first stage and the - # second stage separately. We should rewrite and optimize for this. - psys <- project(mf,fl,data,contrasts,clustervar,pf) - z <- doprojols(psys, exactDOF=exactDOF) - mf[['formula']] <- fformula - z$call <- mf - rm(psys) - - - # now, we need an ftest between the first step with and without the instruments(null-model) - # We need the residuals with and without the - # instruments. We have them with the instruments, but must do another estimation - # for the null-model - mfnull <- mf - nullform <- substitute(Z ~ R,list(Z=ivlhs,R=nullbase[[2]])) - pformnull <- parseformula(nullform, data) - mfnull[['formula']] <- pformnull[['formula']] - znull <- doprojols(project(mfnull, pformnull[['fl']], data, contrasts, clustervar, pf), - exactDOF=exactDOF) - z$iv1fstat <- ftest(z,znull) - z$rob.iv1fstat <- ftest(z,znull,vcov=z$robustvcv) - if(!is.null(clustervar)) - z$clu.iv1fstat <- ftest(z,znull,vcov=z$clustervcv) - step1 <- c(step1,list(z)) - # then we lift the fitted variable and create a new name - ivz <- z - evar <- deparse(ivlhs) - new.var <- paste(evar,'(fit)',sep='') - # store them in an environment - assign(new.var,ivz$fitted.values,envir=ivenv) -# data[[new.var]] <- ivz$fitted - # save these, with the backtick for later use - vars <- c(vars,paste('`',new.var,'`',sep='')) - # keep the residuals, they are needed to reconstruct the residuals for the - # original variables in the 2. stage - ivarg[[paste('`',new.var,'`',sep='')]] <- ivz$residuals - # and add it to the equation - step2form <- update(as.Formula(step2form),as.formula(substitute(. ~ . + FIT | ., - list(FIT=as.name(new.var))))) + ivlhs <- ivv[["formula"]][[2]] + rhsivo <- formula(as.Formula(ivv[["formula"]]), lhs = 0)[[2]] + if (nopart(ivv[["gpart"]])) { + rhsivg <- 0 + } else { + rhsivg <- formula(ivv[["gpart"]], lhs = 0, rhs = 2)[[2]] + } + + fformula <- substitute(Z ~ R, list(Z = ivlhs, R = step1form[[2]])) + + fformula <- do.call(substitute, list(fformula, list(ivO = rhsivo, ivG = rhsivg))) + + ivform <- parseformula(fformula, data) + fl <- ivform[["fl"]] + mf[["formula"]] <- ivform[["formula"]] + + # note that if there are no G() terms among the instrument variables, + # all the other covariates should only be centered once, not in every first stage and the + # second stage separately. We should rewrite and optimize for this. + psys <- project(mf, fl, data, contrasts, clustervar, pf) + z <- doprojols(psys, exactDOF = exactDOF) + mf[["formula"]] <- fformula + z$call <- mf + rm(psys) + + + # now, we need an ftest between the first step with and without the instruments(null-model) + # We need the residuals with and without the + # instruments. We have them with the instruments, but must do another estimation + # for the null-model + mfnull <- mf + nullform <- substitute(Z ~ R, list(Z = ivlhs, R = nullbase[[2]])) + pformnull <- parseformula(nullform, data) + mfnull[["formula"]] <- pformnull[["formula"]] + znull <- doprojols(project(mfnull, pformnull[["fl"]], data, contrasts, clustervar, pf), + exactDOF = exactDOF + ) + z$iv1fstat <- ftest(z, znull) + z$rob.iv1fstat <- ftest(z, znull, vcov = z$robustvcv) + if (!is.null(clustervar)) { + z$clu.iv1fstat <- ftest(z, znull, vcov = z$clustervcv) + } + step1 <- c(step1, list(z)) + # then we lift the fitted variable and create a new name + ivz <- z + evar <- deparse(ivlhs) + new.var <- paste(evar, "(fit)", sep = "") + # store them in an environment + assign(new.var, ivz$fitted.values, envir = ivenv) + # data[[new.var]] <- ivz$fitted + # save these, with the backtick for later use + vars <- c(vars, paste("`", new.var, "`", sep = "")) + # keep the residuals, they are needed to reconstruct the residuals for the + # original variables in the 2. stage + ivarg[[paste("`", new.var, "`", sep = "")]] <- ivz$residuals + # and add it to the equation + step2form <- update(as.Formula(step2form), as.formula(substitute( + . ~ . + FIT | ., + list(FIT = as.name(new.var)) + ))) } names(step1) <- names(iv) # now we have a formula in step2form with all the iv-variables # it's just to project it - pform <- parseformula(step2form,data) - fl <- pform[['fl']] - formula <- pform[['formula']] + pform <- parseformula(step2form, data) + fl <- pform[["fl"]] + formula <- pform[["formula"]] environment(formula) <- ivenv mf$formula <- formula - if(is.environment(mf$data)) mf$data <- ivenv - psys <- project(mf=mf,fl=fl,data=data,contrasts=contrasts,clustervar=clustervar,pf=pf) + if (is.environment(mf$data)) mf$data <- ivenv + psys <- project(mf = mf, fl = fl, data = data, contrasts = contrasts, clustervar = clustervar, pf = pf) - z <- doprojols(psys,ivresid=ivarg,exactDOF=exactDOF) + z <- doprojols(psys, ivresid = ivarg, exactDOF = exactDOF) z$step1 <- step1 z$endovars <- vars rm(psys) z$call <- match.call() return(z) } - - - diff --git a/R/startup.R b/R/startup.R index b888d93..72176f5 100644 --- a/R/startup.R +++ b/R/startup.R @@ -4,55 +4,64 @@ setoption <- function(...) { arg <- list(...) # don't change an existing option - arg <- arg[sapply(paste('lfe',names(arg),sep='.'), function(n) is.null(getOption(n)))] + arg <- arg[sapply(paste("lfe", names(arg), sep = "."), function(n) is.null(getOption(n)))] # fill in from environment variable - if(length(arg) == 0) return() + if (length(arg) == 0) { + return() + } nm <- names(arg) arg <- lapply(nm, function(n) { - e <- Sys.getenv(paste('LFE',toupper(n),sep='_')) - if(e != '') { - val <- try(eval(parse(text=e))) - if(inherits(val, 'try-error')) val <- arg[[n]] + e <- Sys.getenv(paste("LFE", toupper(n), sep = "_")) + if (e != "") { + val <- try(eval(parse(text = e))) + if (inherits(val, "try-error")) val <- arg[[n]] val - } else arg[[n]] + } else { + arg[[n]] + } }) - names(arg) <- paste('lfe',nm,sep='.') + names(arg) <- paste("lfe", nm, sep = ".") do.call(options, arg) } -.onLoad <- function(libname,pkgname) { - setoption(usecg=FALSE, eps=1e-8, pint=1800L, accel=1L, bootmem=500, etol=c(1e-2,1e-12), - robust=FALSE) +.onLoad <- function(libname, pkgname) { + setoption( + usecg = FALSE, eps = 1e-8, pint = 1800L, accel = 1L, bootmem = 500, etol = c(1e-2, 1e-12), + robust = FALSE + ) - if(is.null(cr <- getOption('lfe.threads'))) { - cr <- as.integer(Sys.getenv('LFE_THREADS')) - if(is.na(cr)) cr <- as.integer(Sys.getenv('OMP_NUM_THREADS')) - if(is.na(cr)) cr <- as.integer(Sys.getenv('OMP_THREAD_LIMIT')) - if(is.na(cr)) cr <- as.integer(Sys.getenv('NUMBER_OF_PROCESSORS')) - if(is.na(cr)) cr <- parallel::detectCores(all.tests=TRUE) - if(is.na(cr) || cr < 1) { + if (is.null(cr <- getOption("lfe.threads"))) { + cr <- as.integer(Sys.getenv("LFE_THREADS")) + if (is.na(cr)) cr <- as.integer(Sys.getenv("OMP_NUM_THREADS")) + if (is.na(cr)) cr <- as.integer(Sys.getenv("OMP_THREAD_LIMIT")) + if (is.na(cr)) cr <- as.integer(Sys.getenv("NUMBER_OF_PROCESSORS")) + ## all.tests=TRUE is unsafe for batch use (addefd by ripley) + if (is.na(cr)) cr <- parallel::detectCores() # all.tests=TRUE) + if (is.na(cr) || cr < 1) { cr <- 1 } - options(lfe.threads=cr) + options(lfe.threads = cr) } } .onUnload <- function(libpath) { - options(lfe.usecg=NULL, lfe.eps=NULL,lfe.pint=NULL,lfe.accel=NULL, - lfe.bootmem=NULL,lfe.threads=NULL, lfe.etol=NULL,robust=FALSE) - library.dynam.unload('lfe',libpath) + options( + lfe.usecg = NULL, lfe.eps = NULL, lfe.pint = NULL, lfe.accel = NULL, + lfe.bootmem = NULL, lfe.threads = NULL, lfe.etol = NULL, robust = FALSE + ) + library.dynam.unload("lfe", libpath) } -if(!exists('anyNA')) anyNA <- function(x) any(is.na(x)) +if (!exists("anyNA")) anyNA <- function(x) any(is.na(x)) # Phase out cBind/rBind. Make sure we still work with older versions. # Used in utils.R, chmethod.R and in some Rd-files (and vignettes) # Make functions with (...) rather than just point to the right functions. # The latter will raise a warning in checks about calls to .Internal functions. RV <- R.Version() -rv <- paste(RV$major,RV$minor, sep='.') +rv <- paste(RV$major, RV$minor, sep = ".") mv <- sessionInfo()$otherPkgs$Matrix$Version -if(compareVersion('3.2-0',rv) > 0 || compareVersion('1.2-0',mv) > 0) { +if (compareVersion("3.2-0", rv) > 0 || compareVersion("1.2-0", mv) > 0) { ..cbind.. <- quote(Matrix::cBind) } else { ..cbind.. <- quote(cbind) @@ -62,8 +71,8 @@ mycbind <- local(function(...) { cl <- match.call() cl[[1L]] <- ..cbind.. eval.parent(cl) -}, list(..cbind..=..cbind..)) -rm(rv,RV,mv,..cbind..) +}, list(..cbind.. = ..cbind..)) +rm(rv, RV, mv, ..cbind..) ## numcores <- function() { diff --git a/R/trace.R b/R/trace.R index 94f08c5..688aae3 100644 --- a/R/trace.R +++ b/R/trace.R @@ -4,7 +4,7 @@ # multiplying the matrix with a matrix, we use an iterative method # from quantum physics, based on the general formula for the expectation # of a quadratic form E(x' M x) = tr(MV) + E(x)' M E(x) where V=Cov(x). It reduces -# to +# to # E(x' M x) = tr(M) # when x has zero expectation and identity covariance matrix # the most efficient (lowest variation) is to draw x uniformly from {-1,1} @@ -18,169 +18,174 @@ #' Compute trace of a large matrix by sample means -#' -#' +#' +#' #' Some matrices are too large to be represented as a matrix, even as a sparse #' matrix. Nevertheless, it can be possible to compute the matrix vector #' product fairly easy, and this is utilized to estimate the trace of the #' matrix. -#' -#' \code{mctrace} is used internally by \code{\link{fevcov}} and -#' \code{\link{bccorr}}, but has been made public since it might be useful for +#' +#' `mctrace` is used internally by [fevcov()] and +#' [bccorr()], but has been made public since it might be useful for #' other tasks as well. -#' +#' #' For any matrix \eqn{A}, the trace equals the sum of the diagonal elements, #' or the sum of the eigenvalues. However, if the size of the matrix is very #' large, we may not have a matrix representation, so the diagonal is not #' immediately available. In that case we can use the formula \eqn{tr(A) = #' E(x^t A x)}{tr(A) = E(x'Ax)} where \eqn{x} is a random vector with zero -#' expectation and \eqn{Var(x) = I}. We estimate the expecation with sample -#' means. \code{mctrace} draws \eqn{x} in \eqn{\{-1,1\}^N}{{-1,1}}, and -#' evaluates \code{mat} on these vectors. -#' -#' If \code{mat} is a function, it must be able to take a matrix of column +#' expectation and \eqn{Var(x) = I}. We estimate the expectation with sample +#' means. `mctrace` draws \eqn{x} in \eqn{\{-1,1\}^N}{{-1,1}}, and +#' evaluates `mat` on these vectors. +#' +#' If `mat` is a function, it must be able to take a matrix of column #' vectors as input. Since \eqn{x^t A x = (Ax,x)}{x'Ax = (Ax,x)} is evaluated, #' where \eqn{(\cdot,\cdot)}{(,)} is the Euclidean inner product, the function -#' \code{mat} can perform this inner product itself. In that case the function -#' should have an attribute \code{attr(mat,'IP') <- TRUE} to signal this. -#' -#' If \code{mat} is a list of factors, the matrix for which to estimate the +#' `mat` can perform this inner product itself. In that case the function +#' should have an attribute `attr(mat,'IP') <- TRUE` to signal this. +#' +#' If `mat` is a list of factors, the matrix for which to estimate the #' trace, is the projection matrix which projects out the factors. I.e. how #' many dimensions are left when the factors have been projected out. Thus, it #' is possible to estimate the degrees of freedom in an OLS where factors are #' projected out. -#' -#' The tolerance \code{tol} is a relative tolerance. The iteration terminates +#' +#' The tolerance `tol` is a relative tolerance. The iteration terminates #' when the normalized standard deviation of the sample mean (s.d. divided by -#' absolute value of the current sample mean) goes below \code{tol}. Specify a -#' negative \code{tol} to use the absolute standard deviation. The tolerance +#' absolute value of the current sample mean) goes below `tol`. Specify a +#' negative `tol` to use the absolute standard deviation. The tolerance #' can also change during the iterations; you can specify #' \code{tol=function(curest) {...}} and return a tolerance based on the #' current estimate of the trace (i.e. the current sample mean). -#' +#' #' @param mat square matrix, Matrix, function or list of factors. -#' @param N integer. if \code{mat} is a function, the size of the matrix is +#' @param N integer. if `mat` is a function, the size of the matrix is #' specified here. #' @param tol numeric. Tolerance. #' @param maxsamples numeric. Maximum number of samples in the expectation #' estimation. #' @param trname character. Arbitrary name used in progress reports. #' @param init numeric. Initial guess for the trace. -#' @return An estimate of the trace of the matrix represented by \code{mat} is +#' @return An estimate of the trace of the matrix represented by `mat` is #' returned. #' @examples -#' -#' A <- matrix(rnorm(25),5) -#' fun <- function(x) A %*% x -#' sum(diag(A)) -#' sum(eigen(A,only.values=TRUE)$values) -#' # mctrace is not really useful for small problems. -#' mctrace(fun,ncol(A),tol=0.05) -#' # try a larger problem (3000x3000): -#' f1 <- factor(sample(1500,3000,replace=TRUE)) -#' f2 <- factor(sample(1500,3000,replace=TRUE)) -#' fl <- list(f1,f2) -#' mctrace(fl,tol=-5) -#' # exact: -#' length(f1) - nlevels(f1) - nlevels(f2) + nlevels(compfactor(fl)) -#' +#' +#' A <- matrix(rnorm(25), 5) +#' fun <- function(x) A %*% x +#' sum(diag(A)) +#' sum(eigen(A, only.values = TRUE)$values) +#' # mctrace is not really useful for small problems. +#' mctrace(fun, ncol(A), tol = 0.05) +#' # try a larger problem (3000x3000): +#' f1 <- factor(sample(1500, 3000, replace = TRUE)) +#' f2 <- factor(sample(1500, 3000, replace = TRUE)) +#' fl <- list(f1, f2) +#' mctrace(fl, tol = -5) +#' # exact: +#' length(f1) - nlevels(f1) - nlevels(f2) + nlevels(compfactor(fl)) +#' #' @export mctrace -mctrace <- function(mat, N, tol=1e-3, maxsamples=Inf, - trname='', init) { - if(is.matrix(mat) || inherits(mat,'Matrix')) { - return(structure(sum(diag(mat)), sd=0, iterations=0)) - } else if(is.list(mat) && all(sapply(mat, is.factor))) { +mctrace <- function(mat, N, tol = 1e-3, maxsamples = Inf, + trname = "", init) { + if (is.matrix(mat) || inherits(mat, "Matrix")) { + return(structure(sum(diag(mat)), sd = 0, iterations = 0)) + } else if (is.list(mat) && all(sapply(mat, is.factor))) { N <- length(mat[[1]]) - fun <- function(v,trtol) colSums(demeanlist(v, mat)*v) - } else if(!is.function(mat)) { - stop('mat must be function, factor list or matrix') + fun <- function(v, trtol) colSums(demeanlist(v, mat) * v) + } else if (!is.function(mat)) { + stop("mat must be function, factor list or matrix") } else { - if(missing(N)) stop('N (vector length) must be specified with mat a function') - if(isTRUE(attr(mat,'IP'))) { + if (missing(N)) stop("N (vector length) must be specified with mat a function") + if (isTRUE(attr(mat, "IP"))) { # inner product is done by the function itself. fun <- mat } else { - fun <- function(v,trtol) colSums(mat(v)*v) + fun <- function(v, trtol) colSums(mat(v) * v) } } - if(!is.function(tol)) eps <- function(x) tol else eps <- tol - - threads <- getOption('lfe.threads') - if(maxsamples < threads) threads <- maxsamples + if (!is.function(tol)) eps <- function(x) tol else eps <- tol + + threads <- getOption("lfe.threads") + if (maxsamples < threads) threads <- maxsamples maxB <- getOption("lfe.bootmem") * 1e+06 - maxvpt <- maxB %/% (2*8*N*threads) - if(maxvpt*threads > 4096) maxvpt <- 4096 %/% threads -# if(maxvpt == 0) { -# maxvpt <- 1 -# } + maxvpt <- maxB %/% (2 * 8 * N * threads) + if (maxvpt * threads > 4096) maxvpt <- 4096 %/% threads + # if(maxvpt == 0) { + # maxvpt <- 1 + # } # ensure at least 8 vectors in first iteration - if(threads >= 8) - vpt <- 1 - else - vpt <- 16 %/% (threads+1) + if (threads >= 8) { + vpt <- 1 + } else { + vpt <- 16 %/% (threads + 1) + } + - - blk <- vpt*threads - if(blk > maxsamples) blk <- maxsamples + blk <- vpt * threads + if (blk > maxsamples) blk <- maxsamples i <- 0 tr <- 0 sqsum <- 0 NN <- 0 last <- start <- Sys.time() # get a clue about the tolerance. -# cureps <- eps(as.numeric(fun(0, trtol=0)))/2 - if(missing(init)) init <- N - cureps <- eps(init)/2 - pint <- getOption('lfe.pint') + # cureps <- eps(as.numeric(fun(0, trtol=0)))/2 + if (missing(init)) init <- N + cureps <- eps(init) / 2 + pint <- getOption("lfe.pint") - while(NN < maxsamples && (NN < 8 || - (cureps > 0 && relsd > cureps) || - (cureps < 0 && sd > -cureps))) { - i <- i+1 + while (NN < maxsamples && (NN < 8 || + (cureps > 0 && relsd > cureps) || + (cureps < 0 && sd > -cureps))) { + i <- i + 1 now <- Sys.time() - if(NN > 0) { - remaining <- as.integer((Ntarget-NN)/(NN/as.numeric(now-start, units='secs'))) - if(remaining > pint && as.numeric(now - last, units='secs') > pint) { - message(' *** trace ',trname,' sample ',NN,' of ',Ntarget, - ',mean ',signif(tr/NN,3), ', sd ',signif(sd,3),', target ', signif(cureps,3), - ', expected finish at ', - now + remaining) + if (NN > 0) { + remaining <- as.integer((Ntarget - NN) / (NN / as.numeric(now - start, units = "secs"))) + if (remaining > pint && as.numeric(now - last, units = "secs") > pint) { + message( + " *** trace ", trname, " sample ", NN, " of ", Ntarget, + ",mean ", signif(tr / NN, 3), ", sd ", signif(sd, 3), ", target ", signif(cureps, 3), + ", expected finish at ", + now + remaining + ) last <- now } } - - ests <- fun(matrix(sample(c(-1,1), N*blk, replace=TRUE), N), trtol=abs(cureps)) + + ests <- fun(matrix(sample(c(-1, 1), N * blk, replace = TRUE), N), trtol = abs(cureps)) gc() -# cat('ests : ', mean(ests), ' ', sd(ests)); print(fivenum(ests)) + # cat('ests : ', mean(ests), ' ', sd(ests)); print(fivenum(ests)) NN <- NN + blk tr <- tr + sum(ests) sqsum <- sqsum + sum(ests^2) rm(ests) # compute sd for the mean tr/NN. It's sqrt(E(x^2) - E(X)^2)/sqrt(NN) - if(NN > 1) - sd <- sqrt(sqsum/(NN-1) - tr^2/((NN*(NN-1))))/sqrt(NN) - else - sd <- 0 + if (NN > 1) { + sd <- sqrt(sqsum / (NN - 1) - tr^2 / ((NN * (NN - 1)))) / sqrt(NN) + } else { + sd <- 0 + } -# message(trname,' sd=',sd,' relsd=',relsd,' NN=',NN, ' cureps=',cureps) - cureps <- eps(tr/NN) + # message(trname,' sd=',sd,' relsd=',relsd,' NN=',NN, ' cureps=',cureps) + cureps <- eps(tr / NN) # try to figure out how many iterations are needed to obtain the # desired tolerance. - sdtarget <- if(cureps < 0) -cureps else cureps*abs(tr/NN) - if(NN == 1) sd <- sqrt(2)*sdtarget - relsd <- sd/abs(tr/NN) - Ntarget <- as.integer((sd/sdtarget)^2*NN) - if(is.na(Ntarget)) stop('Too much variance in trace samples for ',trname, ', sd ',sd,', cureps ',cureps) - vpt <- 1 + (Ntarget-NN) %/% threads - if(vpt > maxvpt) vpt <- maxvpt - if(vpt < 1) vpt <- 1 - blk <- vpt*threads + sdtarget <- if (cureps < 0) -cureps else cureps * abs(tr / NN) + if (NN == 1) sd <- sqrt(2) * sdtarget + relsd <- sd / abs(tr / NN) + Ntarget <- as.integer((sd / sdtarget)^2 * NN) + if (is.na(Ntarget)) stop("Too much variance in trace samples for ", trname, ", sd ", sd, ", cureps ", cureps) + vpt <- 1 + (Ntarget - NN) %/% threads + if (vpt > maxvpt) vpt <- maxvpt + if (vpt < 1) vpt <- 1 + blk <- vpt * threads + } + # if(last > start) cat('\n') + dt <- as.numeric(Sys.time() - start, units = "secs") + if (dt > pint) { + message(" *** trace ", trname, " ", NN, " samples finished in ", as.integer(dt), " seconds") } -# if(last > start) cat('\n') - dt <- as.numeric(Sys.time()-start, units='secs') - if(dt > pint) - message(' *** trace ',trname,' ',NN, ' samples finished in ', as.integer(dt), ' seconds') - structure(tr/NN, sd=sd, iterations=NN) + structure(tr / NN, sd = sd, iterations = NN) } diff --git a/R/utils.R b/R/utils.R index 80e7d04..d0be39f 100644 --- a/R/utils.R +++ b/R/utils.R @@ -6,17 +6,17 @@ # it should only be used on objects with a single reference # Our use of it is safe, and we don't export it. setdimnames <- function(obj, nm) { - .Call(C_setdimnames,obj,nm) + .Call(C_setdimnames, obj, nm) } scalecols <- function(obj, vec) { .Call(C_scalecols, obj, vec) } -Crowsum <- function(x,f,mean=FALSE) .Call(C_rowsum,x,f,mean) +crowsum <- function(x, f, mean = FALSE) .Call(C_rowsum, x, f, mean) orthonormalize <- function(V) { - structure(V %*% solve(chol(crossprod(V))), ortho=TRUE) + structure(V %*% solve(chol(crossprod(V))), ortho = TRUE) } # This function sets an attribute 'inplace' on the argument @@ -25,117 +25,127 @@ orthonormalize <- function(V) { # up seriously if used in the wrong manner, so it's not public # except as in an argument to demeanlist (through eval-trickery). unnamed <- function(x) { - .Call(C_inplace,x) + .Call(C_inplace, x) } -wmean <- function(x,w) sum(w*x)/sum(w) -wcov <- function(x,y,w) { - sw <- w/sum(w) - mx <- sum(sw*x) - my <- sum(sw*y) +wmean <- function(x, w) sum(w * x) / sum(w) +wcov <- function(x, y, w) { + sw <- w / sum(w) + mx <- sum(sw * x) + my <- sum(sw * y) cx <- x - mx cy <- y - my - sum(sw*cx*cy)/(1-sum(sw^2)) + sum(sw * cx * cy) / (1 - sum(sw^2)) } -wvar <- function(x,w) wcov(x,x,w) +wvar <- function(x, w) wcov(x, x, w) pinvx <- function(X) { -# return(pinv(nazero(X))) - ch <- cholx(nazero(X)) - badvars <- attr(ch,'badvars') - inv1 <- chol2inv(ch) - if(is.null(badvars)) return(inv1) - inv <- matrix(0,nrow(X),ncol(X)) - inv[-badvars,-badvars] <- inv1 - structure(inv,badvars=attr(ch,'badvars')) + # return(pinv(nazero(X))) + ch <- cholx(nazero(X)) + badvars <- attr(ch, "badvars") + inv1 <- chol2inv(ch) + if (is.null(badvars)) { + return(inv1) + } + inv <- matrix(0, nrow(X), ncol(X)) + inv[-badvars, -badvars] <- inv1 + structure(inv, badvars = attr(ch, "badvars")) } # Do a Cholesky to detect multi-collinearities -cholx <- function(mat, eps=1e-6) { - if(is.null(dim(mat))) dim(mat) <- c(1,1) +cholx <- function(mat, eps = 1e-6) { + if (is.null(dim(mat))) dim(mat) <- c(1, 1) N <- dim(mat)[1] - if(N == 1) { - return(structure(sqrt(mat),badvars=if(mat<=0) 1 else NULL)) + if (N == 1) { + return(structure(sqrt(mat), badvars = if (mat <= 0) 1 else NULL)) } # first, try a pivoted one - tol <- N*getOption('lfe.eps') - chp <- chol(mat,pivot=TRUE,tol=tol) - rank <- attr(chp,'rank') - if(rank == N) return(chol(mat)) - pivot <- attr(chp,'pivot') + tol <- N * getOption("lfe.eps") + chp <- chol(mat, pivot = TRUE, tol = tol) + rank <- attr(chp, "rank") + if (rank == N) { + return(chol(mat)) + } + pivot <- attr(chp, "pivot") oo <- order(pivot) - badvars <- pivot[((rank+1):N)] + badvars <- pivot[((rank + 1):N)] ok <- (1:N)[-badvars] - ch <- chol(mat[ok,ok]) - return(structure(ch,badvars=badvars)) + ch <- chol(mat[ok, ok]) + return(structure(ch, badvars = badvars)) } -cholsolve <- function(A,b) { +cholsolve <- function(A, b) { ch <- chol(A) - backsolve(ch,backsolve(ch,b,transpose=TRUE)) + backsolve(ch, backsolve(ch, b, transpose = TRUE)) } -pinv <- function (X, tol = sqrt(.Machine$double.eps)) { - if (length(dim(X)) > 2L || !(is.numeric(X) || is.complex(X))) +pinv <- function(X, tol = sqrt(.Machine$double.eps)) { + if (length(dim(X)) > 2L || !(is.numeric(X) || is.complex(X))) { stop("'X' must be a numeric or complex matrix") - if (!is.matrix(X)) + } + if (!is.matrix(X)) { X <- as.matrix(X) + } Xsvd <- svd(X) badvars <- integer(0) Positive <- Xsvd$d > max(tol * Xsvd$d[1L], 0) - Positive <- ifelse(is.na(Positive),FALSE,Positive) + Positive <- ifelse(is.na(Positive), FALSE, Positive) badvars <- which(!Positive) - if (all(Positive)) - res <- Xsvd$v %*% (1/Xsvd$d * t(Xsvd$u)) - else if (!any(Positive)) + if (all(Positive)) { + res <- Xsvd$v %*% (1 / Xsvd$d * t(Xsvd$u)) + } else if (!any(Positive)) { res <- array(0, dim(X)[2L:1L]) - else - res <- Xsvd$v[, Positive, drop = FALSE] %*% ((1/Xsvd$d[Positive]) * - t(Xsvd$u[, Positive, drop = FALSE])) - structure(res,badvars=badvars) + } else { + res <- Xsvd$v[, Positive, drop = FALSE] %*% ((1 / Xsvd$d[Positive]) * + t(Xsvd$u[, Positive, drop = FALSE])) + } + structure(res, badvars = badvars) } -nazero <- function(x) ifelse(is.na(x),0,x) +nazero <- function(x) ifelse(is.na(x), 0, x) -ftest <- function(z, zr=NULL, vcov=z$vcv) { - rdf <- z$df #z$N - z$p - 1 - if(is.null(zr)) { -# we should do F-test vs. model with intercept. -# but the intercept in z is implicit. - F <- (t(coef(z)) %*% pinvx(vcov) %*% coef(z))/z$p - return( c(F=F, p=pf(F, z$p, rdf, lower.tail=FALSE), df1=z$p, df2=rdf)) +ftest <- function(z, zr = NULL, vcov = z$vcv) { + rdf <- z$df # z$N - z$p - 1 + if (is.null(zr)) { + # we should do F-test vs. model with intercept. + # but the intercept in z is implicit. + F <- (t(coef(z)) %*% pinvx(vcov) %*% coef(z)) / z$p + return(c(F = F, p = pf(F, z$p, rdf, lower.tail = FALSE), df1 = z$p, df2 = rdf)) } -# df1 <- length(coef(z)) - length(coef(zr)) + # df1 <- length(coef(z)) - length(coef(zr)) df1 <- z$p - zr$p c1 <- coef(z) - c2 <- rep(0,length(c1)) + c2 <- rep(0, length(c1)) names(c2) <- names(c1) c2[names(coef(zr))] <- coef(zr) - F <- (t(c1-c2) %*% pinvx(vcov) %*% (c1-c2))/df1 - return(c(F=F, p=pf(F,df1, rdf, lower.tail=FALSE), df1=df1,df2=rdf)) + F <- (t(c1 - c2) %*% pinvx(vcov) %*% (c1 - c2)) / df1 + return(c(F = F, p = pf(F, df1, rdf, lower.tail = FALSE), df1 = df1, df2 = rdf)) } -mkgraph <- function(f1,f2) { - if(!requireNamespace('igraph', quietly=TRUE)) stop('Package igraph not found') -# graph.edgelist(cbind(paste('f1',f1),paste('f2',f2)), directed=FALSE) -# graph.edgelist(cbind(500000000+as.integer(f1),f2), directed=FALSE) - igraph::graph.adjacency(tcrossprod(makeDmatrix(list(f1,f2)))>0, - 'undirected', diag=FALSE) +mkgraph <- function(f1, f2) { + if (!requireNamespace("igraph", quietly = TRUE)) stop("Package igraph not found") + # graph.edgelist(cbind(paste('f1',f1),paste('f2',f2)), directed=FALSE) + # graph.edgelist(cbind(500000000+as.integer(f1),f2), directed=FALSE) + igraph::graph.adjacency(tcrossprod(makeDmatrix(list(f1, f2))) > 0, + "undirected", + diag = FALSE + ) } -diamgraph <- function(flist,approx=TRUE) { - if(!requireNamespace('igraph', quietly=TRUE)) stop('Package igraph not found') - gr <- mkgraph(flist[[1]],flist[[2]]) -# find largest cluster +diamgraph <- function(flist, approx = TRUE) { + if (!requireNamespace("igraph", quietly = TRUE)) stop("Package igraph not found") + gr <- mkgraph(flist[[1]], flist[[2]]) + # find largest cluster cl <- igraph::clusters(gr)$membership lcl <- which(cl == which.max(table(cl))) - if(approx) - max(igraph::shortest.paths(gr,v=sample(lcl,10),to=sample(lcl,10))) - else - igraph::diameter(igraph::induced.subgraph(gr,lcl)) + if (approx) { + max(igraph::shortest.paths(gr, v = sample(lcl, 10), to = sample(lcl, 10))) + } else { + igraph::diameter(igraph::induced.subgraph(gr, lcl)) + } } @@ -145,45 +155,47 @@ diamgraph <- function(flist,approx=TRUE) { #' Find diameters of mobility graphs -#' +#' #' 'diammatrix' computes the diameters of certain graphs related to convergence -#' speed of \code{felm}. -#' -#' Each pair of factors (f1,f2) from \code{flist} defines a bipartite graph in +#' speed of `felm`. +#' +#' Each pair of factors (f1,f2) from `flist` defines a bipartite graph in #' which the vertices are the levels of the factors, and two vertices are #' adjacent if they are observed simultaneously. The connected components of #' this graph are important for identification of the coefficients for the -#' factor levels, i.e. for \code{getfe}. But experience and some trials have +#' factor levels, i.e. for `getfe`. But experience and some trials have #' led the author to speculate that the diameter of the graph (or its largest #' component) is also important for the convergence rate. Specifically, the #' author suspects that under some assumptions, time to convergence goes like #' the square of the diameter. At least in the case of two factors. This #' function computes the diameter for each pair of factors. If the graph is -#' disconnected, the largest connected component is used. If \code{accel=TRUE} +#' disconnected, the largest connected component is used. If `accel=TRUE` #' (the default), the diameter is approximated from below by drawing two sets #' of 10 random vertices and finding the maximum length of the shortest paths #' between them. -#' +#' #' @param flist a list of factors defining the dummies. #' @param approx logical. Approximate diameters are computed. -#' @return A matrix of dimension K x K where K is \code{length(flist)}. +#' @return A matrix of dimension K x K where K is `length(flist)`. #' @note This function is not important to the operation of the package, it is #' included for easy experimentation with the convergence rate. It requires #' that the suggested package \pkg{igraph} is attached. #' @keywords internal -diammatrix <- function(flist, approx=TRUE) { - +diammatrix <- function(flist, approx = TRUE) { flen <- length(flist) - if(flen < 2) return(0) - val <- matrix(0,flen,flen) + if (flen < 2) { + return(0) + } + val <- matrix(0, flen, flen) colnames(val) <- names(flist) rownames(val) <- names(flist) - for(if1 in 1:(flen-1)) - for(if2 in (if1+1):flen) { - val[if1,if2] <- diamgraph(flist[c(if1,if2)], approx) + for (if1 in 1:(flen - 1)) { + for (if2 in (if1 + 1):flen) { + val[if1, if2] <- diamgraph(flist[c(if1, if2)], approx) } + } -# fill in lower triangle: + # fill in lower triangle: val[row(val) > col(val)] <- t(val)[row(val) > col(val)] val } @@ -191,36 +203,39 @@ diammatrix <- function(flist, approx=TRUE) { # if(!exists('fixef')) fixef <- function(object,...) UseMethod('fixef') # compute rank deficiency of D-matrix -rankDefic <- function(fl,method='cholesky',mctol=1e-3) { +rankDefic <- function(fl, method = "cholesky", mctol = 1e-3) { eps <- sqrt(.Machine$double.eps) - if(length(fl) == 1) return(1) - if(length(fl) == 2) return(nlevels(compfactor(fl))) - if(method == 'cholesky') { + if (length(fl) == 1) { + return(1) + } + if (length(fl) == 2) { + return(nlevels(compfactor(fl))) + } + if (method == "cholesky") { D <- makeDmatrix(fl) - Ch <- as(Cholesky(crossprod(D), super=TRUE, perm=TRUE, Imult=eps), 'sparseMatrix') - sum(diag(Ch) < eps^(1/4)) - } else if(method == 'mc') { - totlev <- sum(sapply(fl,nlevels)) + Ch <- as(Cholesky(crossprod(D), super = TRUE, perm = TRUE, Imult = eps), "sparseMatrix") + sum(diag(Ch) < eps^(1 / 4)) + } else if (method == "mc") { + totlev <- sum(sapply(fl, nlevels)) N <- length(fl[[1]]) len <- length(fl) + 1 # now, we will use the rank deficiency d to compute # degrees of freedom corrections. I.e. (tr+totlev)+len should # be within a certain relative tolerance, which translates # to an absolute tolerance of tr - tolfun <- function(tr) -mctol*abs((tr+totlev)+len) + tolfun <- function(tr) -mctol * abs((tr + totlev) + len) init <- 0 - N - (mctrace(fl,tol=tolfun,init=init) + totlev)+len - + N - (mctrace(fl, tol = tolfun, init = init) + totlev) + len } else { D <- makeDmatrix(fl) - as.integer(ncol(D) - rankMatrix(crossprod(D), method='qr.R')) + as.integer(ncol(D) - rankMatrix(crossprod(D), method = "qr.R")) } } # in makeDmatrix/makePmatrix, the weights argument should be the # square root of the weights. This is what is stored in internal structures. -# +# @@ -228,173 +243,210 @@ rankDefic <- function(fl,method='cholesky',mctol=1e-3) { #' Make sparse matrix of dummies from factor list -#' +#' #' Given a list of factors, return the matrix of dummies as a sparse matrix. -#' +#' #' The function returns the model matrix for a list of factors. This matrix is #' not used internally by the package, but it's used in some of the #' documentation for illustrative purposes. -#' +#' #' @param fl list of factors. #' @param weights numeric vector. Multiplied into the rows. #' @return Returns a sparse matrix. #' @examples -#' -#' fl <- lapply(1:3, function(i) factor(sample(3,10,replace=TRUE))) -#' fl -#' makeDmatrix(fl, weights=seq(0.1,1,0.1)) -#' +#' +#' fl <- lapply(1:3, function(i) factor(sample(3, 10, replace = TRUE))) +#' fl +#' makeDmatrix(fl, weights = seq(0.1, 1, 0.1)) +#' #' @export makeDmatrix -makeDmatrix <- function(fl, weights=NULL) { +makeDmatrix <- function(fl, weights = NULL) { # make the D matrix # for pure factors f, it's just the t(as(f,'sparseMatrix')) # if there's a covariate vector x, it's t(as(f,'sparseMatrix'))*x # for a covariate matrix x, it's the cbinds of the columns with the factor-matrix - ans <- do.call(mycbind,lapply(fl, function(f) { - x <- attr(f,'x',exact=TRUE) - fm <- t(as(f,'sparseMatrix')) - if(is.null(x)) return(fm) - if(!is.matrix(x)) return(fm*x) - do.call(mycbind,apply(x,2,'*',fm)) + ans <- do.call(mycbind, lapply(fl, function(f) { + x <- attr(f, "x", exact = TRUE) + fm <- t(as(f, "sparseMatrix")) + if (is.null(x)) { + return(fm) + } + if (!is.matrix(x)) { + return(fm * x) + } + do.call(mycbind, apply(x, 2, "*", fm)) })) nm <- names(fl) - if(is.null(nm)) nm <- paste('f',seq_along(fl),sep='') - levnm <- unlist(sapply(seq_along(fl), function(i) xlevels(nm[i],fl[[i]]))) - if(!is.null(weights)) - ans <- Diagonal(length(weights),weights) %*% ans + if (is.null(nm)) nm <- paste("f", seq_along(fl), sep = "") + levnm <- unlist(sapply(seq_along(fl), function(i) xlevels(nm[i], fl[[i]]))) + if (!is.null(weights)) { + ans <- Diagonal(length(weights), weights) %*% ans + } colnames(ans) <- levnm ans } -makePmatrix <- function(fl, weights=NULL) { - D <- makeDmatrix(fl,weights) +makePmatrix <- function(fl, weights = NULL) { + D <- makeDmatrix(fl, weights) DtD <- crossprod(D) DtDi <- pinvx(as.matrix(DtD)) - badvars <- attr(DtDi,'badvars') - if(is.null(badvars)) return(D %*% DtDi %*% t(D)) - D <- D[,-badvars,drop=FALSE] + badvars <- attr(DtDi, "badvars") + if (is.null(badvars)) { + return(D %*% DtDi %*% t(D)) + } + D <- D[, -badvars, drop = FALSE] return(D %*% pinvx(as.matrix(crossprod(D))) %*% t(D)) } # total number of variables projected out totalpvar <- function(fl) { - if(length(fl) == 0) return(0) + if (length(fl) == 0) { + return(0) + } sum(sapply(fl, function(f) { - x <- attr(f,'x',exact=TRUE) - if(is.null(x) || !is.matrix(x)) return(nlevels(f)) - return(ncol(x)*nlevels(f)) + x <- attr(f, "x", exact = TRUE) + if (is.null(x) || !is.matrix(x)) { + return(nlevels(f)) + } + return(ncol(x) * nlevels(f)) })) } -nrefs <- function(fl, cf, exactDOF=FALSE) { - if(length(fl) == 0) return(0) - if(missing(cf)) cf <- compfactor(fl) - numpure <- sum(sapply(fl,function(f) is.null(attr(f,'x',exact=TRUE)))) - if(numpure == 1) return(0) - if(numpure == 2) return(nlevels(cf)) - if(identical(exactDOF,'rM')) { - return(rankDefic(fl, method='qr')) - } else if(identical(exactDOF,'mc')) { - return(rankDefic(fl, method='mc')) - } else if(exactDOF) { - return(rankDefic(fl, method='cholesky')) - } - return(nlevels(cf) + numpure-2) +nrefs <- function(fl, cf, exactDOF = FALSE) { + if (length(fl) == 0) { + return(0) + } + if (missing(cf)) cf <- compfactor(fl) + numpure <- sum(sapply(fl, function(f) is.null(attr(f, "x", exact = TRUE)))) + if (numpure == 1) { + return(0) + } + if (numpure == 2) { + return(nlevels(cf)) + } + if (identical(exactDOF, "rM")) { + return(rankDefic(fl, method = "qr")) + } else if (identical(exactDOF, "mc")) { + return(rankDefic(fl, method = "mc")) + } else if (exactDOF) { + return(rankDefic(fl, method = "cholesky")) + } + return(nlevels(cf) + numpure - 2) } -wildcard <- function(formula, s=ls(environment(formula)), re=FALSE, - nomatch.failure=TRUE) { +wildcard <- function(formula, s = ls(environment(formula)), re = FALSE, + nomatch.failure = TRUE) { env <- environment(formula) F <- as.Formula(formula) lenlhs <- length(F)[1] lenrhs <- length(F)[2] - if(lenlhs == 1 && lenrhs == 1) return(swildcard(formula,s,re, nomatch.failure=nomatch.failure)); + if (lenlhs == 1 && lenrhs == 1) { + return(swildcard(formula, s, re, nomatch.failure = nomatch.failure)) + } # do the parts separately lhs <- lapply(seq_len(lenlhs), function(lh) { - swildcard(formula(F, lhs=lh,rhs=0), s, re, nomatch.failure=nomatch.failure)[[2]] + swildcard(formula(F, lhs = lh, rhs = 0), s, re, nomatch.failure = nomatch.failure)[[2]] }) rhs <- lapply(seq_len(lenrhs), function(rh) { - swildcard(formula(F, lhs=0,rhs=rh), s, re, nomatch.failure=nomatch.failure)[[2]] + swildcard(formula(F, lhs = 0, rhs = rh), s, re, nomatch.failure = nomatch.failure)[[2]] }) - if(length(lhs) > 0) { + if (length(lhs) > 0) { LHS <- lhs[[1]] - for(s in lhs[-1]) { + for (s in lhs[-1]) { LHS <- bquote(.(LHS) | .(s)) } - } else LHS <- NULL + } else { + LHS <- NULL + } RHS <- rhs[[1]] - for(s in rhs[-1]) { + for (s in rhs[-1]) { RHS <- bquote(.(RHS) | .(s)) } - if(is.null(LHS)) return(as.Formula(substitute( ~ R, list(R=RHS)), env=env)) - else - as.Formula(substitute(L ~ R, list(L=LHS, R=RHS)), env=env) + if (is.null(LHS)) { + return(as.Formula(substitute(~R, list(R = RHS)), env = env)) + } else { + as.Formula(substitute(L ~ R, list(L = LHS, R = RHS)), env = env) + } } -swildcard <- function(formula, s=ls(environment(formula)), re=FALSE, - nomatch.failure=TRUE) { +swildcard <- function(formula, s = ls(environment(formula)), re = FALSE, + nomatch.failure = TRUE) { av <- all.vars(formula) - if(!re) { - rchars <- c('*','?') - wild <- unique(unlist(sapply(rchars,grep, av,fixed=TRUE,value=TRUE))) - if(length(wild) > 0) - rewild <- utils::glob2rx(wild,trim.tail=FALSE) - else - rewild <- NULL + if (!re) { + rchars <- c("*", "?") + wild <- unique(unlist(sapply(rchars, grep, av, fixed = TRUE, value = TRUE))) + if (length(wild) > 0) { + rewild <- utils::glob2rx(wild, trim.tail = FALSE) + } else { + rewild <- NULL + } # quote some regexp-specials - rewild <- lapply(seq_along(rewild), function(i) structure(rewild[i],orig=wild[i])) + rewild <- lapply(seq_along(rewild), function(i) structure(rewild[i], orig = wild[i])) } else { - rchars <- c('.','*','|','\\','?','[',']','(',')','{','}') - wild <- unique(unlist(sapply(rchars, grep, av, fixed=TRUE, value=TRUE))) - rewild <- lapply(wild, function(w) structure(w,orig=w)) + rchars <- c(".", "*", "|", "\\", "?", "[", "]", "(", ")", "{", "}") + wild <- unique(unlist(sapply(rchars, grep, av, fixed = TRUE, value = TRUE))) + rewild <- lapply(wild, function(w) structure(w, orig = w)) } wsub <- lapply(rewild, function(w) { - mtch <- grep(paste('^',w,'$',sep=''), s, value=TRUE, perl=TRUE) - if(length(mtch) == 0) { - if(nomatch.failure) stop("Couldn't match wildcard variable `", - attr(w,'orig'),'`', call.=FALSE) - mtch <- attr(w,'orig') + mtch <- grep(paste("^", w, "$", sep = ""), s, value = TRUE, perl = TRUE) + if (length(mtch) == 0) { + if (nomatch.failure) { + stop("Couldn't match wildcard variable `", + attr(w, "orig"), "`", + call. = FALSE + ) + } + mtch <- attr(w, "orig") } - as.list(parse(text=paste('`',mtch,'`',collapse='+',sep='')))[[1]] + as.list(parse(text = paste("`", mtch, "`", collapse = "+", sep = "")))[[1]] }) names(wsub) <- wild - formula(as.Formula(do.call(substitute, list(formula,wsub)), - env=environment(formula)), - update=T, collapse=TRUE, drop=FALSE) + formula( + as.Formula(do.call(substitute, list(formula, wsub)), + env = environment(formula) + ), + update = T, collapse = TRUE, drop = FALSE + ) } -#prettyprint a list of integers +# prettyprint a list of integers ilpretty <- function(il) { - paste(tapply(il,cumsum(c(1,diff(il)) != 1), - function(x) if(length(x) == 1) { - as.character(x) - } else { - paste(x[[1]],x[[length(x)]],sep=':') - }), collapse=' ') + paste(tapply( + il, cumsum(c(1, diff(il)) != 1), + function(x) { + if (length(x) == 1) { + as.character(x) + } else { + paste(x[[1]], x[[length(x)]], sep = ":") + } + } + ), collapse = " ") } -makefitnames <- function(s) gsub('`(Intercept)(fit)`','(Intercept)',paste('`',s,'(fit)`',sep=''), fixed=TRUE) +makefitnames <- function(s) gsub("`(Intercept)(fit)`", "(Intercept)", paste("`", s, "(fit)`", sep = ""), fixed = TRUE) delete.icpt <- function(x) { - asgn <- attr(x,'assign') + asgn <- attr(x, "assign") icpt <- asgn == 0 - if(!length(icpt)) return(x) + if (!length(icpt)) { + return(x) + } asgn <- asgn[!icpt] - ctr <- attr(x,'contrasts') - x <- x[,!icpt,drop=FALSE] - attr(x,'assign') <- asgn - attr(x,'contrasts') <- ctr + ctr <- attr(x, "contrasts") + x <- x[, !icpt, drop = FALSE] + attr(x, "assign") <- asgn + attr(x, "contrasts") <- ctr x } # do an ls on env, and the parent and all the way up to top -rls <- function(env, top=.GlobalEnv) { +rls <- function(env, top = .GlobalEnv) { ret <- character() e <- env repeat { - if(identical(e,emptyenv())) break; - ret <- c(ret,ls(e)) - if(identical(e,top)) break; + if (identical(e, emptyenv())) break + ret <- c(ret, ls(e)) + if (identical(e, top)) break e <- parent.env(e) } unique(ret) @@ -408,135 +460,152 @@ limlk <- function(mm) { # and M_i is projection out of both instruments and exogenous # variables ye <- cbind(mm$y, mm$ivy) - H1 <- crossprod(ye, newols(list(y=ye,x=mm$x),nostats=TRUE)$residuals) - H <- crossprod(ye, newols(list(y=ye,x=cbind(mm$ivx,mm$x)), nostats=TRUE)$residuals) - mat <- solve(H,H1) - min(eigen(mat, only.values=TRUE)$values) + H1 <- crossprod(ye, newols(list(y = ye, x = mm$x), nostats = TRUE)$residuals) + H <- crossprod(ye, newols(list(y = ye, x = cbind(mm$ivx, mm$x)), nostats = TRUE)$residuals) + mat <- solve(H, H1) + min(eigen(mat, only.values = TRUE)$values) } -resample <- function(cluster, return.factor=FALSE, na.action=NULL, fill=FALSE) { - if(is.list(cluster)) { - if(is.factor(cluster[[1]])) { - if(length(cluster) > 1) warning('List of factors not supported in resample, using first only (',names(cluster)[1],')') +resample <- function(cluster, return.factor = FALSE, na.action = NULL, fill = FALSE) { + if (is.list(cluster)) { + if (is.factor(cluster[[1]])) { + if (length(cluster) > 1) warning("List of factors not supported in resample, using first only (", names(cluster)[1], ")") cluster <- cluster[[1]] } } # resample entire levels of a factor - if(length(na.action) > 0) cluster <- factor(cluster[-na.action]) + if (length(na.action) > 0) cluster <- factor(cluster[-na.action]) iclu <- as.integer(cluster) - cl <- sort(sample(nlevels(cluster), replace=TRUE)) - if(fill) { + cl <- sort(sample(nlevels(cluster), replace = TRUE)) + if (fill) { tb <- table(cluster) - while(sum(tb[cl]) < length(cluster)) { - cl <- c(cl,sample(nlevels(cluster),1)) + while (sum(tb[cl]) < length(cluster)) { + cl <- c(cl, sample(nlevels(cluster), 1)) } cl <- sort(cl) } # find a faster way to do this: # s <- sort(unlist(sapply(cl, function(ll) which(clu==ll)))) s <- NULL - while(length(cl) > 0) { - s <- c(s,which(iclu %in% cl)) - cl <- cl[c(1L,diff(cl)) == 0] + while (length(cl) > 0) { + s <- c(s, which(iclu %in% cl)) + cl <- cl[c(1L, diff(cl)) == 0] + } + if (return.factor) { + return(cluster[s]) } - if(return.factor) return(cluster[s]) sort(s) } -getcomp <- function(est, alpha=NULL) { - if(nlevels(est$cfactor) == 1) return(list(est=est,alpha=alpha)) +getcomp <- function(est, alpha = NULL) { + if (nlevels(est$cfactor) == 1) { + return(list(est = est, alpha = alpha)) + } ok <- which(est$cfactor == 1) res <- est res$cfactor <- factor(res$cfactor[ok]) - if(!is.null(res$X)) - res$X <- res$X[ok,] - res$residuals <- res$residuals[ok,] - res$response <- res$response[ok,] - res$fitted.values <- res$fitted.values[ok,] - res$r.residuals <- res$r.residuals[ok,] + if (!is.null(res$X)) { + res$X <- res$X[ok, ] + } + res$residuals <- res$residuals[ok, ] + res$response <- res$response[ok, ] + res$fitted.values <- res$fitted.values[ok, ] + res$r.residuals <- res$r.residuals[ok, ] res$fe <- lapply(est$fe, function(f) factor(f[ok])) - if(alpha != NULL) { + if (alpha != NULL) { # of the two first factors, remove the components beyond the first # if there are more than two factors, the remaining are assumed to be ok # so find those with 'fe' among the two first, and comp > 1 - - bad <- which((alpha[,'fe'] %in% names(est$fe)[1:2]) & (alpha[,'comp'] > 1)) - if(length(bad) > 0) alpha <- alpha[-bad,] + + bad <- which((alpha[, "fe"] %in% names(est$fe)[1:2]) & (alpha[, "comp"] > 1)) + if (length(bad) > 0) alpha <- alpha[-bad, ] } - return(est=res,alpha=alpha) + return(est = res, alpha = alpha) } -#' Chain subset conditions +#' Chain subset conditions #' -#' @param ... Logical conditions to be chained. +#' @param ... Logical conditions to be chained. #' @param out.vars character. Variables not in data.frame, only needed if you use variables which -#' are not in the frame. If \code{out.vars} is not specified, it is assumed to match all variables +#' are not in the frame. If `out.vars` is not specified, it is assumed to match all variables #' starting with a dot ('.'). -#' @return Expression that can be \code{eval}'ed to yield a logical subset mask. +#' @return Expression that can be `eval`'ed to yield a logical subset mask. #' @details #' A set of logical conditions are chained, not and'ed. That is, each argument to -#' \code{chainsubset} is used as a filter to create a smaller dataset. Each subsequent +#' `chainsubset` is used as a filter to create a smaller dataset. Each subsequent #' argument filters further. #' For independent conditions this will be the same as and'ing them. I.e. -#' \code{chainsubset(x < 0 , y < 0)} will yield the same subset as \code{(x < 0) & (y < 0)}. -#' However, for aggregate filters like \code{chainsubset(x < mean(y), x > mean(y))} -#' we first find all the observations with \code{x < mean(y)}, then among these we -#' find the ones with \code{x > mean(y)}. The last \code{mean(y)} is now conditional on -#' \code{x < mean(y)}. -#' +#' `chainsubset(x < 0 , y < 0)` will yield the same subset as `(x < 0) & (y < 0)`. +#' However, for aggregate filters like `chainsubset(x < mean(y), x > mean(y))` +#' we first find all the observations with `x < mean(y)`, then among these we +#' find the ones with `x > mean(y)`. The last `mean(y)` is now conditional on +#' `x < mean(y)`. +#' #' @examples #' set.seed(48) #' N <- 10000 -#' dat <- data.frame(y=rnorm(N), x=rnorm(N)) +#' dat <- data.frame(y = rnorm(N), x = rnorm(N)) #' # It's not the same as and'ing the conditions: -#' felm(y ~ x,data=dat,subset=chainsubset(x < mean(y), y < 2*mean(x))) -#' felm(y ~ x,data=dat,subset=chainsubset(y < 2*mean(x), x < mean(y))) -#' felm(y ~ x,data=dat,subset=(x < mean(y)) & (y < 2*mean(x))) -#' lm(y ~ x, data=dat, subset=chainsubset(x < mean(y), x > mean(y))) +#' felm(y ~ x, data = dat, subset = chainsubset(x < mean(y), y < 2 * mean(x))) +#' felm(y ~ x, data = dat, subset = chainsubset(y < 2 * mean(x), x < mean(y))) +#' felm(y ~ x, data = dat, subset = (x < mean(y)) & (y < 2 * mean(x))) +#' lm(y ~ x, data = dat, subset = chainsubset(x < mean(y), x > mean(y))) #' @note #' Some trickery is done to make this work directly in the subset argument of functions like -#' \code{felm()} and \code{lm()}. It might possibly fail with an error message in some situations. -#' If this happens, it should be done in two steps: \code{ss <- eval(chainsubset(...),data); -#' lm(...,data=data, subset=ss)}. In particular, the arguments are taken literally, -#' constructions like \code{function(...) {chainsubset(...)}} or \code{a <- quote(x < y); chainsubset(a)} do -#' not work, but \code{do.call(chainsubset,list(a))} does. +#' `felm()` and `lm()`. It might possibly fail with an error message in some situations. +#' If this happens, it should be done in two steps: `ss <- eval(chainsubset(...),data); +#' lm(...,data=data, subset=ss)`. In particular, the arguments are taken literally, +#' constructions like \code{function(...) {chainsubset(...)}} or `a <- quote(x < y); chainsubset(a)` do +#' not work, but `do.call(chainsubset,list(a))` does. #' @export chainsubset <- function(..., out.vars) { - if(sys.parent() != 0) { + if (sys.parent() != 0) { cl <- sys.call(sys.parent()) - mc <- match.call(expand.dots=TRUE) - if(cl[[1]] == quote(eval)) { + mc <- match.call(expand.dots = TRUE) + if (cl[[1]] == quote(eval)) { # we are called from eval, probably inside lm. let's replace it with a evalq and a direct call, then # evaluation of the result ecaller <- parent.frame(3) cl[[1]] <- quote(evalq) cl[[2]] <- match.call() - cl[[2]] <- eval(cl,ecaller) - return(eval(cl,ecaller)) + cl[[2]] <- eval(cl, ecaller) + return(eval(cl, ecaller)) } } - args <- as.list(match.call(expand.dots=TRUE))[-1] - args[['out.vars']] <- NULL - if(length(args) < 1) return(NULL) - if(length(args) == 1) return(structure(args[[1]],filter=args)) - group <- list(as.name('{')) + args <- as.list(match.call(expand.dots = TRUE))[-1] + args[["out.vars"]] <- NULL + if (length(args) < 1) { + return(NULL) + } + if (length(args) == 1) { + return(structure(args[[1]], filter = args)) + } + group <- list(as.name("{")) miss <- missing(out.vars) - R <- as.name(sprintf('R%x',RR <- sample(.Machine$integer.max,1))) - FF <- as.name(sprintf('F%x',RR)) + R <- as.name(sprintf("R%x", RR <- sample(.Machine$integer.max, 1))) + FF <- as.name(sprintf("F%x", RR)) structure( - bquote(local({ - .(R) <- logical(length(.(FF) <- .(args[[1]]))) - .(R)[.(Reduce(function(f1,f2) { - nm <- all.vars(f2) - nm <- lapply(if(miss) - grep('^\\.',nm,value=TRUE,invert=TRUE) - else - nm[!(nm %in% out.vars)],as.name) - recod <- as.call(c(group, lapply(nm, function(n) bquote(.(n) <- .(n)[.(FF), drop=TRUE])))) - bquote({.(FF) <- .(f1); local({.(recod); .(FF)[.(f2)]})}) - }, args[-1],init=bquote(base::which(.(FF)))))] <- TRUE - .(R) - })), - filter=args) + bquote(local({ + .(R) <- logical(length(.(FF) <- .(args[[1]]))) + .(R)[.(Reduce(function(f1, f2) { + nm <- all.vars(f2) + nm <- lapply(if (miss) { + grep("^\\.", nm, value = TRUE, invert = TRUE) + } else { + nm[!(nm %in% out.vars)] + }, as.name) + recod <- as.call(c(group, lapply(nm, function(n) bquote(.(n) <- .(n)[.(FF), drop = TRUE])))) + bquote({ + .(FF) <- .(f1) + local({ + .(recod) + .(FF)[.(f2)] + }) + }) + }, args[-1], init = bquote(base::which(.(FF)))))] <- TRUE + .(R) + })), + filter = args + ) } diff --git a/R/waldtest.R b/R/waldtest.R index 0da3133..42a9ada 100644 --- a/R/waldtest.R +++ b/R/waldtest.R @@ -1,38 +1,38 @@ #' Compute Wald test for joint restrictions on coefficients -#' +#' #' Compute a Wald test for a linear hypothesis on the coefficients. Also #' supports Delta-approximation for non-linear hypotheses. -#' -#' The function \code{waldtest} computes a Wald test for the H0: R beta = r, -#' where beta is the estimated vector \code{coef(object)}. -#' -#' If \code{R} is a character, integer, or logical vector it is assumed to +#' +#' The function `waldtest` computes a Wald test for the H0: R beta = r, +#' where beta is the estimated vector `coef(object)`. +#' +#' If `R` is a character, integer, or logical vector it is assumed to #' specify a matrix which merely picks out a subset of the coefficients for -#' joint testing. If \code{r} is not specified, it is assumed to be a zero +#' joint testing. If `r` is not specified, it is assumed to be a zero #' vector of the appropriate length. -#' -#' \code{R} can also be a formula which is linear in the estimated -#' coefficients, e.g. of the type \code{~Q-2|x-2*z} which will test the joint +#' +#' `R` can also be a formula which is linear in the estimated +#' coefficients, e.g. of the type `~Q-2|x-2*z` which will test the joint #' hypothesis Q=2 and x=2*z. -#' -#' If \code{R} is a function (of the coefficients), an approximate Wald test -#' against H0: \code{R(beta) == 0}, using the Delta-method, is computed. -#' +#' +#' If `R` is a function (of the coefficients), an approximate Wald test +#' against H0: `R(beta) == 0`, using the Delta-method, is computed. +#' #' In case of an IV-estimation, the names for the endogenous variables in -#' \code{coef(object)} are of the type \code{"`Q(fit)`"} which is a bit dull to +#' `coef(object)` are of the type `"`Q(fit)`"` which is a bit dull to #' type; if all the endogenous variables are to be tested they can be specified -#' as \code{"endovars"}. It is also possible to specify an endogenous variable -#' simply as \code{"Q"}, and \code{waldtest} will add the other syntactic sugar -#' to obtain \code{"`Q(fit)`"}. -#' -#' The \code{type} argument works as follows. If \code{type=='default'} it is +#' as `"endovars"`. It is also possible to specify an endogenous variable +#' simply as `"Q"`, and `waldtest` will add the other syntactic sugar +#' to obtain `"`Q(fit)`"`. +#' +#' The `type` argument works as follows. If `type=='default'` it is #' assumed that the residuals are i.i.d., unless a cluster structure was -#' specified to \code{\link{felm}}. If \code{type=='robust'}, a heteroscedastic +#' specified to [felm()]. If `type=='robust'`, a heteroscedastic #' structure is assumed, even if a cluster structure was specified in -#' \code{\link{felm}}. -#' -#' @param object object of class \code{"felm"}, a result of a call to -#' \code{\link{felm}}. +#' [felm()]. +#' +#' @param object object of class `"felm"`, a result of a call to +#' [felm()]. #' @param R matrix, character, formula, function, integer or logical. #' Specification of which exclusions to test. #' @param r numerical vector. @@ -40,33 +40,33 @@ #' @param lhs character. Name of left hand side if multiple left hand sides. #' @param df1 integer. If you know better than the default df, specify it here. #' @param df2 integer. If you know better than the default df, specify it here. -#' @return The function \code{waldtest} computes and returns a named numeric +#' @return The function `waldtest` computes and returns a named numeric #' vector containing the following elements. -#' -#' \itemize{ \item \code{p} is the p-value for the Chi^2-test \item \code{chi2} -#' is the Chi^2-distributed statistic. \item \code{df1} is the degrees of -#' freedom for the Chi^2 statistic. \item \code{p.F} is the p-value for the F -#' statistics \item \code{F} is the F-distributed statistic. \item \code{df2} +#' +#' \itemize{ \item `p` is the p-value for the Chi^2-test \item `chi2` +#' is the Chi^2-distributed statistic. \item `df1` is the degrees of +#' freedom for the Chi^2 statistic. \item `p.F` is the p-value for the F +#' statistics \item `F` is the F-distributed statistic. \item `df2` #' is the additional degrees of freedom for the F statistic. } -#' -#' The return value has an attribute \code{'formula'} which encodes the +#' +#' The return value has an attribute `'formula'` which encodes the #' restrictions. -#' @seealso \code{\link{nlexpect}} +#' @seealso [nlexpect()] #' @examples -#' +#' #' x <- rnorm(10000) #' x2 <- rnorm(length(x)) -#' y <- x - 0.2*x2 + rnorm(length(x)) -#' #Also works for lm -#' summary(est <- lm(y ~ x + x2 )) +#' y <- x - 0.2 * x2 + rnorm(length(x)) +#' # Also works for lm +#' summary(est <- lm(y ~ x + x2)) #' # We do not reject the true values -#' waldtest(est, ~ x-1|x2+0.2|`(Intercept)`) +#' waldtest(est, ~ x - 1 | x2 + 0.2 | `(Intercept)`) #' # The Delta-method coincides when the function is linear: #' waldtest(est, function(x) x - c(0, 1, -0.2)) -#' +#' #' @export waldtest -waldtest <- function(object, R, r, type=c('default','iid','robust','cluster'), lhs=NULL, df1, df2) { - if(inherits(object,'felm') && object$nostats) stop('No Wald test for objects created with felm(nostats=TRUE)') +waldtest <- function(object, R, r, type = c("default", "iid", "robust", "cluster"), lhs = NULL, df1, df2) { + if (inherits(object, "felm") && object$nostats) stop("No Wald test for objects created with felm(nostats=TRUE)") # We make a chi^2 to test whether the equation R theta = r holds. # The chi^2 is computed according to Wooldridge (5.34, 10.59). @@ -75,171 +75,193 @@ waldtest <- function(object, R, r, type=c('default','iid','robust','cluster'), l # and V is th covariance matrix. # First, find V. It's in either object$vcv, object$robustvcv or object$clustervcv - if(is.null(lhs) && length(object$lhs) > 1) { - stop('Please specify lhs=[one of ',paste(object$lhs, collapse=','),']') + if (is.null(lhs) && length(object$lhs) > 1) { + stop("Please specify lhs=[one of ", paste(object$lhs, collapse = ","), "]") + } + if (!is.null(lhs) && is.na(match(lhs, object$lhs))) { + stop("Please specify lhs=[one of ", paste(object$lhs, collapse = ","), "]") } - if(!is.null(lhs) && is.na(match(lhs, object$lhs))) - stop('Please specify lhs=[one of ',paste(object$lhs, collapse=','),']') type <- type[1] - if(identical(type,'default')) { - if(is.null(object$clustervar)) - V <- vcov(object, type='iid', lhs=lhs) - else - V <- vcov(object, type='cluster', lhs=lhs) - } else - V <- vcov(object, type=type, lhs=lhs) + if (identical(type, "default")) { + if (is.null(object$clustervar)) { + V <- vcov(object, type = "iid", lhs = lhs) + } else { + V <- vcov(object, type = "cluster", lhs = lhs) + } + } else { + V <- vcov(object, type = type, lhs = lhs) + } -# if(is.null(lhs) && length(object$lhs) == 1) lhs <- object$lhs + # if(is.null(lhs) && length(object$lhs) == 1) lhs <- object$lhs cf <- coef(object) - if(is.matrix(cf)) - nmc <- rownames(cf) - else - nmc <- names(cf) + if (is.matrix(cf)) { + nmc <- rownames(cf) + } else { + nmc <- names(cf) + } - if(inherits(R,'formula') || is.call(R) || is.name(R)) { + if (inherits(R, "formula") || is.call(R) || is.name(R)) { Rr <- formtoR(R, nmc) - R <- Rr[,-ncol(Rr), drop=FALSE] - r <- Rr[,ncol(Rr)] - } else if(is.function(R)) { + R <- Rr[, -ncol(Rr), drop = FALSE] + r <- Rr[, ncol(Rr)] + } else if (is.function(R)) { # non-linear stuff. Compute value and gradient of R - if(!requireNamespace('numDeriv', quietly=TRUE)) {warning("package numDeriv must be available to use non-linear Wald test"); return(NULL)} - pt <- coef(object,lhs=lhs) + if (!requireNamespace("numDeriv", quietly = TRUE)) { + warning("package numDeriv must be available to use non-linear Wald test") + return(NULL) + } + pt <- coef(object, lhs = lhs) pt[is.na(pt)] <- 0 val <- R(pt) - if(is.null(dim(val))) dim(val) <- c(length(val), 1) - gr <- numDeriv::jacobian(R,pt) - if(is.null(dim(gr))) dim(gr) <- c(1,length(gr)) - } else if(!is.matrix(R)) { + if (is.null(dim(val))) dim(val) <- c(length(val), 1) + gr <- numDeriv::jacobian(R, pt) + if (is.null(dim(gr))) dim(gr) <- c(1, length(gr)) + } else if (!is.matrix(R)) { # it's not a matrix, so it's a list of parameters, either # names, logicals or indices - if(is.null(R)) R <- nmc - if(is.character(R)) { - ev <- match('endovars', R) - if(!is.na(ev)) { + if (is.null(R)) R <- nmc + if (is.character(R)) { + ev <- match("endovars", R) + if (!is.na(ev)) { # replace with endogenous variables - R <- c(R[-ev],object$endovars) + R <- c(R[-ev], object$endovars) } # did user specify any of the endogenous variables? - fitvars <- paste('`',R,'(fit)`',sep='') - fitpos <- match(fitvars,nmc) + fitvars <- paste("`", R, "(fit)`", sep = "") + fitpos <- match(fitvars, nmc) # replace those which are not NA noNA <- which(!is.na(fitpos)) R[noNA] <- fitvars[noNA] Ri <- match(R, nmc) - if(anyNA(Ri)) stop("Couldn't find variables ",paste(R[is.na(Ri)],collapse=',')) + if (anyNA(Ri)) stop("Couldn't find variables ", paste(R[is.na(Ri)], collapse = ",")) R <- Ri - } else if(is.logical(R)) { + } else if (is.logical(R)) { R <- which(R) } # here R is a list of positions of coefficients # make the projection matrix. - RR <- matrix(0,length(R),length(coef(object,lhs=lhs))) - for(i in seq_along(R)) { - RR[i,R[i]] <- 1 + RR <- matrix(0, length(R), length(coef(object, lhs = lhs))) + for (i in seq_along(R)) { + RR[i, R[i]] <- 1 } R <- RR - } + } # Two cases here. If R is a function, we do a non-linear delta test against 0, otherwise # we do the ordinary Wald test - if(is.function(R)) { + if (is.function(R)) { W <- as.numeric(t(val) %*% solve(gr %*% V %*% t(gr)) %*% val) - if(missing(df1)) df1 <- length(val) + if (missing(df1)) df1 <- length(val) } else { - if(missing(r) || is.null(r)) - r <- rep(0,nrow(R)) - else if(length(r) != nrow(R)) stop('nrow(R) != length(r)') - cf <- coef(object, lhs=lhs) + if (missing(r) || is.null(r)) { + r <- rep(0, nrow(R)) + } else if (length(r) != nrow(R)) stop("nrow(R) != length(r)") + cf <- coef(object, lhs = lhs) cf[is.na(cf)] <- 0 beta <- R %*% cf - r - V[is.na(V)] <- 0 # ignore NAs - W <- try(sum(beta * solve(R %*% V %*% t(R),beta)), silent=TRUE) - - if(inherits(W,'try-error')) - W <- as.numeric(t(beta) %*% pinvx(R %*% V %*% t(R)) %*% beta) + V[is.na(V)] <- 0 # ignore NAs + W <- try(sum(beta * solve(R %*% V %*% t(R), beta)), silent = TRUE) + + if (inherits(W, "try-error")) { + W <- as.numeric(t(beta) %*% pinvx(R %*% V %*% t(R)) %*% beta) + } } # W follows a chi2(Q) distribution, but the F-test has another # df which is ordinarily object$df. However, if there are clusters # the df should be reduced to the number of clusters-1 - if(missing(df2)) { + if (missing(df2)) { df2 <- object$df - if((!is.null(object$clustervar) && type %in% c('default','cluster')) ) { - df2 <- min(nlevels(object$clustervar[[1]])-1, df2) + if ((!is.null(object$clustervar) && type %in% c("default", "cluster"))) { + min_clustvar <- min(vapply(seq_along(object$clustervar), + function(i) nlevels(object$clustervar[[i]]), + FUN.VALUE = integer(1) + )) + df2 <- min(min_clustvar - 1, df2) } } - if(missing(df1)) - df1 <- length(beta) + if (missing(df1)) { + df1 <- length(beta) + } - F <- W/df1 + F <- W / df1 # F follows a F(df1,df2) distribution - if(is.function(R)) frm <- R else frm <- Rtoform(R,r,nmc) - - structure(c(p=pchisq(W, df1, lower.tail=FALSE), chi2=W, df1=df1, - p.F=pf(F,df1,df2, lower.tail=FALSE), F=F, df2=df2), - formula=frm) + if (is.function(R)) frm <- R else frm <- Rtoform(R, r, nmc) + + structure( + c( + p = pchisq(W, df1, lower.tail = FALSE), chi2 = W, df1 = df1, + p.F = pf(F, df1, df2, lower.tail = FALSE), F = F, df2 = df2 + ), + formula = frm + ) } # convert a formula which is a set of linear combinations like ~x+x3 | x2-x4+3 to # matrices R and r such that R %*% coefs = r # the vector r is return as the last column of the result formtoR <- function(formula, coefs) { - conv <- function(f) formtoR(f, coefs) lf <- as.list(formula) - if(lf[[1]] == as.name('~') || lf[[1]] == as.name('quote')) return(conv(lf[[2]])) + if (lf[[1]] == as.name("~") || lf[[1]] == as.name("quote")) { + return(conv(lf[[2]])) + } # here we have a single formula w/o '~' in front, e.g. x+x3|x2-x4, or just x+x3 # split off parts '|' in a loop R <- NULL -# if(length(lf) != 1) stop('length of ',lf, ' is != 1') -# lf <- as.list(lf[[1]]) + # if(length(lf) != 1) stop('length of ',lf, ' is != 1') + # lf <- as.list(lf[[1]]) op <- lf[[1]] - if(op == as.name('|')) { + if (op == as.name("|")) { return(rbind(conv(lf[[2]]), conv(lf[[3]]))) - } else if(op == as.name('+')) { - if(length(lf) == 2) return(conv(lf[[2]])) # unary + + } else if (op == as.name("+")) { + if (length(lf) == 2) { + return(conv(lf[[2]])) + } # unary + return(conv(lf[[2]]) + conv(lf[[3]])) - } else if(op == as.name('-')) { - if(length(lf) == 2) return(-conv(lf[[2]])) # unary - - return(conv(lf[[2]]) - conv(lf[[3]])) - } else if(op == as.name('*')) { + } else if (op == as.name("-")) { + if (length(lf) == 2) { + return(-conv(lf[[2]])) + } # unary - + return(conv(lf[[2]]) - conv(lf[[3]])) + } else if (op == as.name("*")) { f1 <- conv(lf[[2]]) f2 <- conv(lf[[3]]) # the first one must be a numeric, i.e. only last column filled in # and it's negative fac <- -f1[length(f1)] - return(fac * conv(lf[[3]])) - } else if(is.name(op)) { - res <- matrix(0,1,length(coefs)+1) + return(fac * conv(lf[[3]])) + } else if (is.name(op)) { + res <- matrix(0, 1, length(coefs) + 1) pos <- match(as.character(op), coefs) - if(is.na(pos)) { - ivspec <- paste("`",as.character(op),"(fit)`", sep='') + if (is.na(pos)) { + ivspec <- paste("`", as.character(op), "(fit)`", sep = "") pos <- match(ivspec, coefs) } - if(is.na(pos)) stop("Can't find ", op, " among coefficients ", paste(coefs, collapse=',')) + if (is.na(pos)) stop("Can't find ", op, " among coefficients ", paste(coefs, collapse = ",")) res[pos] <- 1 return(res) - } else if(is.numeric(op)) { - return(matrix(c(rep(0,length(coefs)), -op), 1)) + } else if (is.numeric(op)) { + return(matrix(c(rep(0, length(coefs)), -op), 1)) } else { - stop('Unkwnown item ',as.character(op), ' in formula ',formula) + stop("Unkwnown item ", as.character(op), " in formula ", formula) } } -Rtoform <- function(R,r, coefs) { - coefs <- gsub('`','',coefs,fixed=TRUE) - form <- paste('~',paste(apply(R, 1, function(row) { +Rtoform <- function(R, r, coefs) { + coefs <- gsub("`", "", coefs, fixed = TRUE) + form <- paste("~", paste(apply(R, 1, function(row) { w <- which(row != 0) - rw <- paste(' ', row[w], '*`', coefs[w], '`', collapse=' + ', sep='') - rw <- gsub('+ -',' - ',rw, fixed=TRUE) - rw <- gsub(' 1*','',rw, fixed=TRUE) - rw <- gsub('(fit)','',rw, fixed=TRUE) + rw <- paste(" ", row[w], "*`", coefs[w], "`", collapse = " + ", sep = "") + rw <- gsub("+ -", " - ", rw, fixed = TRUE) + rw <- gsub(" 1*", "", rw, fixed = TRUE) + rw <- gsub("(fit)", "", rw, fixed = TRUE) rw - }), ' + ', -r, collapse='|', sep='')) - form <- gsub('+ -','-',form, fixed=TRUE) - form <- gsub(' 0.',' .',form, fixed=TRUE) - form <- gsub('+ 0','',form, fixed=TRUE) + }), " + ", -r, collapse = "|", sep = "")) + form <- gsub("+ -", "-", form, fixed = TRUE) + form <- gsub(" 0.", " .", form, fixed = TRUE) + form <- gsub("+ 0", "", form, fixed = TRUE) local(as.formula(form)) } diff --git a/README.Rmd b/README.Rmd new file mode 100644 index 0000000..dfd38cc --- /dev/null +++ b/README.Rmd @@ -0,0 +1,78 @@ +--- +output: github_document +--- + + + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>", + fig.path = "man/figures/README-", + out.width = "100%" +) +``` + +# lfe + + +[![R-CMD-check](https://github.com/MatthieuStigler/lfe/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/MatthieuStigler/lfe/actions/workflows/R-CMD-check.yaml) +[![R-CMD-check](https://github.com/pachadotdev/lfe/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/pachadotdev/lfe/actions/workflows/R-CMD-check.yaml) + + +The goal of lfe is to speed up the estimation of linear models with large fixed +effects. It includes support for instrumental variables, conditional F +statistics for weak instruments, robust and multi-way clustered standard errors, +as well as limited mobility bias correction. See Gaure (2013) + and Gaure 2014 . + +## Installation + +You can install the development version of lfe like so: + +``` r +remotes::install_github("MatthieuStigler/lfe") +``` + +## Example + +This is a basic example which shows you the speed improvement over base R +for fixed effects estimation. + +```{r example, eval = FALSE} +library(lfe) # fixed effects estimation +library(tradepolicy) # intl trade data +library(dplyr) # data cleaning/transforming + +training_data <- agtpa_applications %>% + mutate( + log_trade = log(trade), + log_dist = log(dist), + exp_year = paste(exporter, year, sep = "_"), + imp_year = paste(importer, year, sep = "_") + ) %>% + filter(trade > 0, exporter != importer, year %in% seq(1986, 2006, 4)) %>% + select(year, log_trade, log_dist, cntg, lang, clny, rta, exp_year, imp_year) + +# note the difference with the | operator to indicate the FEs +# this is just an example, here I am not estimating a PPML model or anything +# in the state of the art +fml1 <- 0 + log_trade ~ + log_dist + cntg + lang + clny + rta + exp_year + imp_year # base + +fml2 <- log_trade ~ + log_dist + cntg + lang + clny + rta | exp_year + imp_year # lfe + +lm(fml1, data = training_data) + +felm(fml2, data = training_data) +``` + +## Testing + +For a complete test with `devtools::check()`, you need to run +`sudo apt-get devtools` or similar before. The package is written in C, in the +future I shall try to rewrite it in C++ to ease long term maintenance. + +The package also needs additional testing. At the present time,the tests it +cover around 30% of the written lines. diff --git a/README.md b/README.md new file mode 100644 index 0000000..8e649da --- /dev/null +++ b/README.md @@ -0,0 +1,68 @@ + + + +# lfe + + + +[![R-CMD-check](https://github.com/MatthieuStigler/lfe/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/MatthieuStigler/lfe/actions/workflows/R-CMD-check.yaml) + + +The goal of lfe is to speed up the estimation of linear models with +large fixed effects. It includes support for instrumental variables, +conditional F statistics for weak instruments, robust and multi-way +clustered standard errors, as well as limited mobility bias correction. +See Gaure (2013) and Gaure 2014 +. + +## Installation + +You can install the development version of lfe like so: + +``` r +remotes::install_github("MatthieuStigler/lfe") +``` + +## Example + +This is a basic example which shows you the speed improvement over base +R for fixed effects estimation. + +``` r +library(lfe) # fixed effects estimation +library(tradepolicy) # intl trade data +library(dplyr) # data cleaning/transforming + +training_data <- agtpa_applications %>% + mutate( + log_trade = log(trade), + log_dist = log(dist), + exp_year = paste(exporter, year, sep = "_"), + imp_year = paste(importer, year, sep = "_") + ) %>% + filter(trade > 0, exporter != importer, year %in% seq(1986, 2006, 4)) %>% + select(year, log_trade, log_dist, cntg, lang, clny, rta, exp_year, imp_year) + +# note the difference with the | operator to indicate the FEs +# this is just an example, here I am not estimating a PPML model or anything +# in the state of the art +fml1 <- 0 + log_trade ~ + log_dist + cntg + lang + clny + rta + exp_year + imp_year # base + +fml2 <- log_trade ~ + log_dist + cntg + lang + clny + rta | exp_year + imp_year # lfe + +lm(fml1, data = training_data) + +felm(fml2, data = training_data) +``` + +## Testing + +For a complete test with `devtools::check()`, you need to run +`sudo apt-get devtools` or similar before. The package is written in C, +in the future I shall try to rewrite it in C++ to ease long term +maintenance. + +The package also needs additional testing. At the present time,the tests +it cover around 30% of the written lines. diff --git a/benchmarks/fitting_times.rds b/benchmarks/fitting_times.rds new file mode 100644 index 0000000..267c72c Binary files /dev/null and b/benchmarks/fitting_times.rds differ diff --git a/configure b/configure index 5d88184..501b850 100755 --- a/configure +++ b/configure @@ -1,9 +1,10 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.69. +# Generated by GNU Autoconf 2.71. # # -# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc. +# Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation, +# Inc. # # # This configure script is free software; the Free Software Foundation @@ -14,14 +15,16 @@ # Be more Bourne compatible DUALCASE=1; export DUALCASE # for MKS sh -if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then : +as_nop=: +if test ${ZSH_VERSION+y} && (emulate sh) >/dev/null 2>&1 +then : emulate sh NULLCMD=: # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which # is contrary to our usage. Disable this feature. alias -g '${1+"$@"}'='"$@"' setopt NO_GLOB_SUBST -else +else $as_nop case `(set -o) 2>/dev/null` in #( *posix*) : set -o posix ;; #( @@ -31,46 +34,46 @@ esac fi + +# Reset variables that may have inherited troublesome values from +# the environment. + +# IFS needs to be set, to space, tab, and newline, in precisely that order. +# (If _AS_PATH_WALK were called with IFS unset, it would have the +# side effect of setting IFS to empty, thus disabling word splitting.) +# Quoting is to prevent editors from complaining about space-tab. as_nl=' ' export as_nl -# Printing a long string crashes Solaris 7 /usr/bin/printf. -as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' -as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo -as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo -# Prefer a ksh shell builtin over an external printf program on Solaris, -# but without wasting forks for bash or zsh. -if test -z "$BASH_VERSION$ZSH_VERSION" \ - && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then - as_echo='print -r --' - as_echo_n='print -rn --' -elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then - as_echo='printf %s\n' - as_echo_n='printf %s' -else - if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then - as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"' - as_echo_n='/usr/ucb/echo -n' - else - as_echo_body='eval expr "X$1" : "X\\(.*\\)"' - as_echo_n_body='eval - arg=$1; - case $arg in #( - *"$as_nl"*) - expr "X$arg" : "X\\(.*\\)$as_nl"; - arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;; - esac; - expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl" - ' - export as_echo_n_body - as_echo_n='sh -c $as_echo_n_body as_echo' - fi - export as_echo_body - as_echo='sh -c $as_echo_body as_echo' -fi +IFS=" "" $as_nl" + +PS1='$ ' +PS2='> ' +PS4='+ ' + +# Ensure predictable behavior from utilities with locale-dependent output. +LC_ALL=C +export LC_ALL +LANGUAGE=C +export LANGUAGE + +# We cannot yet rely on "unset" to work, but we need these variables +# to be unset--not just set to an empty or harmless value--now, to +# avoid bugs in old shells (e.g. pre-3.0 UWIN ksh). This construct +# also avoids known problems related to "unset" and subshell syntax +# in other old shells (e.g. bash 2.01 and pdksh 5.2.14). +for as_var in BASH_ENV ENV MAIL MAILPATH CDPATH +do eval test \${$as_var+y} \ + && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : +done + +# Ensure that fds 0, 1, and 2 are open. +if (exec 3>&0) 2>/dev/null; then :; else exec 0&1) 2>/dev/null; then :; else exec 1>/dev/null; fi +if (exec 3>&2) ; then :; else exec 2>/dev/null; fi # The user is always right. -if test "${PATH_SEPARATOR+set}" != set; then +if ${PATH_SEPARATOR+false} :; then PATH_SEPARATOR=: (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || @@ -79,13 +82,6 @@ if test "${PATH_SEPARATOR+set}" != set; then fi -# IFS -# We need space, tab and new line, in precisely that order. Quoting is -# there to prevent editors from complaining about space-tab. -# (If _AS_PATH_WALK were called with IFS unset, it would disable word -# splitting by setting IFS to empty value.) -IFS=" "" $as_nl" - # Find who we are. Look in the path if we contain no directory separator. as_myself= case $0 in #(( @@ -94,8 +90,12 @@ case $0 in #(( for as_dir in $PATH do IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + test -r "$as_dir$0" && as_myself=$as_dir$0 && break done IFS=$as_save_IFS @@ -107,30 +107,10 @@ if test "x$as_myself" = x; then as_myself=$0 fi if test ! -f "$as_myself"; then - $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 + printf "%s\n" "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 exit 1 fi -# Unset variables that we do not need and which cause bugs (e.g. in -# pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1" -# suppresses any "Segmentation fault" message there. '((' could -# trigger a bug in pdksh 5.2.14. -for as_var in BASH_ENV ENV MAIL MAILPATH -do eval test x\${$as_var+set} = xset \ - && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : -done -PS1='$ ' -PS2='> ' -PS4='+ ' - -# NLS nuisances. -LC_ALL=C -export LC_ALL -LANGUAGE=C -export LANGUAGE - -# CDPATH. -(unset CDPATH) >/dev/null 2>&1 && unset CDPATH # Use a proper internal environment variable to ensure we don't fall # into an infinite loop, continuously re-executing ourselves. @@ -152,20 +132,22 @@ esac exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"} # Admittedly, this is quite paranoid, since all the known shells bail # out after a failed `exec'. -$as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2 -as_fn_exit 255 +printf "%s\n" "$0: could not re-execute with $CONFIG_SHELL" >&2 +exit 255 fi # We don't want this to propagate to other subprocesses. { _as_can_reexec=; unset _as_can_reexec;} if test "x$CONFIG_SHELL" = x; then - as_bourne_compatible="if test -n \"\${ZSH_VERSION+set}\" && (emulate sh) >/dev/null 2>&1; then : + as_bourne_compatible="as_nop=: +if test \${ZSH_VERSION+y} && (emulate sh) >/dev/null 2>&1 +then : emulate sh NULLCMD=: # Pre-4.2 versions of Zsh do word splitting on \${1+\"\$@\"}, which # is contrary to our usage. Disable this feature. alias -g '\${1+\"\$@\"}'='\"\$@\"' setopt NO_GLOB_SUBST -else +else \$as_nop case \`(set -o) 2>/dev/null\` in #( *posix*) : set -o posix ;; #( @@ -185,42 +167,53 @@ as_fn_success || { exitcode=1; echo as_fn_success failed.; } as_fn_failure && { exitcode=1; echo as_fn_failure succeeded.; } as_fn_ret_success || { exitcode=1; echo as_fn_ret_success failed.; } as_fn_ret_failure && { exitcode=1; echo as_fn_ret_failure succeeded.; } -if ( set x; as_fn_ret_success y && test x = \"\$1\" ); then : +if ( set x; as_fn_ret_success y && test x = \"\$1\" ) +then : -else +else \$as_nop exitcode=1; echo positional parameters were not saved. fi test x\$exitcode = x0 || exit 1 +blah=\$(echo \$(echo blah)) +test x\"\$blah\" = xblah || exit 1 test -x / || exit 1" as_suggested=" as_lineno_1=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_1a=\$LINENO as_lineno_2=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_2a=\$LINENO eval 'test \"x\$as_lineno_1'\$as_run'\" != \"x\$as_lineno_2'\$as_run'\" && test \"x\`expr \$as_lineno_1'\$as_run' + 1\`\" = \"x\$as_lineno_2'\$as_run'\"' || exit 1 test \$(( 1 + 1 )) = 2 || exit 1" - if (eval "$as_required") 2>/dev/null; then : + if (eval "$as_required") 2>/dev/null +then : as_have_required=yes -else +else $as_nop as_have_required=no fi - if test x$as_have_required = xyes && (eval "$as_suggested") 2>/dev/null; then : + if test x$as_have_required = xyes && (eval "$as_suggested") 2>/dev/null +then : -else +else $as_nop as_save_IFS=$IFS; IFS=$PATH_SEPARATOR as_found=false for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH do IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac as_found=: case $as_dir in #( /*) for as_base in sh bash ksh sh5; do # Try only shells that exist, to save several forks. - as_shell=$as_dir/$as_base + as_shell=$as_dir$as_base if { test -f "$as_shell" || test -f "$as_shell.exe"; } && - { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$as_shell"; } 2>/dev/null; then : + as_run=a "$as_shell" -c "$as_bourne_compatible""$as_required" 2>/dev/null +then : CONFIG_SHELL=$as_shell as_have_required=yes - if { $as_echo "$as_bourne_compatible""$as_suggested" | as_run=a "$as_shell"; } 2>/dev/null; then : + if as_run=a "$as_shell" -c "$as_bourne_compatible""$as_suggested" 2>/dev/null +then : break 2 fi fi @@ -228,14 +221,21 @@ fi esac as_found=false done -$as_found || { if { test -f "$SHELL" || test -f "$SHELL.exe"; } && - { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$SHELL"; } 2>/dev/null; then : - CONFIG_SHELL=$SHELL as_have_required=yes -fi; } IFS=$as_save_IFS +if $as_found +then : + +else $as_nop + if { test -f "$SHELL" || test -f "$SHELL.exe"; } && + as_run=a "$SHELL" -c "$as_bourne_compatible""$as_required" 2>/dev/null +then : + CONFIG_SHELL=$SHELL as_have_required=yes +fi +fi - if test "x$CONFIG_SHELL" != x; then : + if test "x$CONFIG_SHELL" != x +then : export CONFIG_SHELL # We cannot yet assume a decent shell, so we have to provide a # neutralization value for shells without unset; and this also @@ -253,18 +253,19 @@ esac exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"} # Admittedly, this is quite paranoid, since all the known shells bail # out after a failed `exec'. -$as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2 +printf "%s\n" "$0: could not re-execute with $CONFIG_SHELL" >&2 exit 255 fi - if test x$as_have_required = xno; then : - $as_echo "$0: This script requires a shell more modern than all" - $as_echo "$0: the shells that I found on your system." - if test x${ZSH_VERSION+set} = xset ; then - $as_echo "$0: In particular, zsh $ZSH_VERSION has bugs and should" - $as_echo "$0: be upgraded to zsh 4.3.4 or later." + if test x$as_have_required = xno +then : + printf "%s\n" "$0: This script requires a shell more modern than all" + printf "%s\n" "$0: the shells that I found on your system." + if test ${ZSH_VERSION+y} ; then + printf "%s\n" "$0: In particular, zsh $ZSH_VERSION has bugs and should" + printf "%s\n" "$0: be upgraded to zsh 4.3.4 or later." else - $as_echo "$0: Please tell bug-autoconf@gnu.org about your system, + printf "%s\n" "$0: Please tell bug-autoconf@gnu.org about your system, $0: including any error possibly output before this $0: message. Then install a modern shell, or manually run $0: the script under such a shell if you do have one." @@ -291,6 +292,7 @@ as_fn_unset () } as_unset=as_fn_unset + # as_fn_set_status STATUS # ----------------------- # Set $? to STATUS, without forking. @@ -308,6 +310,14 @@ as_fn_exit () as_fn_set_status $1 exit $1 } # as_fn_exit +# as_fn_nop +# --------- +# Do nothing but, unlike ":", preserve the value of $?. +as_fn_nop () +{ + return $? +} +as_nop=as_fn_nop # as_fn_mkdir_p # ------------- @@ -322,7 +332,7 @@ as_fn_mkdir_p () as_dirs= while :; do case $as_dir in #( - *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( + *\'*) as_qdir=`printf "%s\n" "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( *) as_qdir=$as_dir;; esac as_dirs="'$as_qdir' $as_dirs" @@ -331,7 +341,7 @@ $as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$as_dir" : 'X\(//\)[^/]' \| \ X"$as_dir" : 'X\(//\)$' \| \ X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || -$as_echo X"$as_dir" | +printf "%s\n" X"$as_dir" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q @@ -370,12 +380,13 @@ as_fn_executable_p () # advantage of any shell optimizations that allow amortized linear growth over # repeated appends, instead of the typical quadratic growth present in naive # implementations. -if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then : +if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null +then : eval 'as_fn_append () { eval $1+=\$2 }' -else +else $as_nop as_fn_append () { eval $1=\$$1\$2 @@ -387,18 +398,27 @@ fi # as_fn_append # Perform arithmetic evaluation on the ARGs, and store the result in the # global $as_val. Take advantage of shells that can avoid forks. The arguments # must be portable across $(()) and expr. -if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then : +if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null +then : eval 'as_fn_arith () { as_val=$(( $* )) }' -else +else $as_nop as_fn_arith () { as_val=`expr "$@" || test $? -eq 1` } fi # as_fn_arith +# as_fn_nop +# --------- +# Do nothing but, unlike ":", preserve the value of $?. +as_fn_nop () +{ + return $? +} +as_nop=as_fn_nop # as_fn_error STATUS ERROR [LINENO LOG_FD] # ---------------------------------------- @@ -410,9 +430,9 @@ as_fn_error () as_status=$1; test $as_status -eq 0 && as_status=1 if test "$4"; then as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack - $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 fi - $as_echo "$as_me: error: $2" >&2 + printf "%s\n" "$as_me: error: $2" >&2 as_fn_exit $as_status } # as_fn_error @@ -439,7 +459,7 @@ as_me=`$as_basename -- "$0" || $as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ X"$0" : 'X\(//\)$' \| \ X"$0" : 'X\(/\)' \| . 2>/dev/null || -$as_echo X/"$0" | +printf "%s\n" X/"$0" | sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/ q @@ -483,7 +503,7 @@ as_cr_alnum=$as_cr_Letters$as_cr_digits s/-\n.*// ' >$as_me.lineno && chmod +x "$as_me.lineno" || - { $as_echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2; as_fn_exit 1; } + { printf "%s\n" "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2; as_fn_exit 1; } # If we had to re-execute with $CONFIG_SHELL, we're ensured to have # already done that, so ensure we don't try to do so again and fall @@ -497,6 +517,10 @@ as_cr_alnum=$as_cr_Letters$as_cr_digits exit } + +# Determine whether it's possible to make 'echo' print without a newline. +# These variables are no longer used directly by Autoconf, but are AC_SUBSTed +# for compatibility with existing Makefiles. ECHO_C= ECHO_N= ECHO_T= case `echo -n x` in #((((( -n*) @@ -510,6 +534,13 @@ case `echo -n x` in #((((( ECHO_N='-n';; esac +# For backward compatibility with old third-party macros, we provide +# the shell variables $as_echo and $as_echo_n. New code should use +# AS_ECHO(["message"]) and AS_ECHO_N(["message"]), respectively. +as_echo='printf %s\n' +as_echo_n='printf %s' + + rm -f conf$$ conf$$.exe conf$$.file if test -d conf$$.dir; then rm -f conf$$.dir/conf$$.file @@ -575,12 +606,12 @@ MFLAGS= MAKEFLAGS= # Identity of this package. -PACKAGE_NAME= -PACKAGE_TARNAME= -PACKAGE_VERSION= -PACKAGE_STRING= -PACKAGE_BUGREPORT= -PACKAGE_URL= +PACKAGE_NAME='' +PACKAGE_TARNAME='' +PACKAGE_VERSION='' +PACKAGE_STRING='' +PACKAGE_BUGREPORT='' +PACKAGE_URL='' ac_unique_file="lfe" ac_unique_file="src/lfe.c" @@ -589,6 +620,7 @@ LIBOBJS HAVE_THREADNAME PTHREAD_CFLAGS PTHREAD_LIBS +PTHREAD_CXX PTHREAD_CC ax_pthread_config EGREP @@ -734,8 +766,6 @@ do *) ac_optarg=yes ;; esac - # Accept the important Cygnus configure options, so we can diagnose typos. - case $ac_dashdash$ac_option in --) ac_dashdash=yes ;; @@ -776,9 +806,9 @@ do ac_useropt=`expr "x$ac_option" : 'x-*disable-\(.*\)'` # Reject names that are not valid shell variable names. expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && - as_fn_error $? "invalid feature name: $ac_useropt" + as_fn_error $? "invalid feature name: \`$ac_useropt'" ac_useropt_orig=$ac_useropt - ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + ac_useropt=`printf "%s\n" "$ac_useropt" | sed 's/[-+.]/_/g'` case $ac_user_opts in *" "enable_$ac_useropt" @@ -802,9 +832,9 @@ do ac_useropt=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'` # Reject names that are not valid shell variable names. expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && - as_fn_error $? "invalid feature name: $ac_useropt" + as_fn_error $? "invalid feature name: \`$ac_useropt'" ac_useropt_orig=$ac_useropt - ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + ac_useropt=`printf "%s\n" "$ac_useropt" | sed 's/[-+.]/_/g'` case $ac_user_opts in *" "enable_$ac_useropt" @@ -1015,9 +1045,9 @@ do ac_useropt=`expr "x$ac_option" : 'x-*with-\([^=]*\)'` # Reject names that are not valid shell variable names. expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && - as_fn_error $? "invalid package name: $ac_useropt" + as_fn_error $? "invalid package name: \`$ac_useropt'" ac_useropt_orig=$ac_useropt - ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + ac_useropt=`printf "%s\n" "$ac_useropt" | sed 's/[-+.]/_/g'` case $ac_user_opts in *" "with_$ac_useropt" @@ -1031,9 +1061,9 @@ do ac_useropt=`expr "x$ac_option" : 'x-*without-\(.*\)'` # Reject names that are not valid shell variable names. expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && - as_fn_error $? "invalid package name: $ac_useropt" + as_fn_error $? "invalid package name: \`$ac_useropt'" ac_useropt_orig=$ac_useropt - ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + ac_useropt=`printf "%s\n" "$ac_useropt" | sed 's/[-+.]/_/g'` case $ac_user_opts in *" "with_$ac_useropt" @@ -1077,9 +1107,9 @@ Try \`$0 --help' for more information" *) # FIXME: should be removed in autoconf 3.0. - $as_echo "$as_me: WARNING: you should use --build, --host, --target" >&2 + printf "%s\n" "$as_me: WARNING: you should use --build, --host, --target" >&2 expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null && - $as_echo "$as_me: WARNING: invalid host type: $ac_option" >&2 + printf "%s\n" "$as_me: WARNING: invalid host type: $ac_option" >&2 : "${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option}" ;; @@ -1095,7 +1125,7 @@ if test -n "$ac_unrecognized_opts"; then case $enable_option_checking in no) ;; fatal) as_fn_error $? "unrecognized options: $ac_unrecognized_opts" ;; - *) $as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2 ;; + *) printf "%s\n" "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2 ;; esac fi @@ -1159,7 +1189,7 @@ $as_expr X"$as_myself" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$as_myself" : 'X\(//\)[^/]' \| \ X"$as_myself" : 'X\(//\)$' \| \ X"$as_myself" : 'X\(/\)' \| . 2>/dev/null || -$as_echo X"$as_myself" | +printf "%s\n" X"$as_myself" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q @@ -1321,9 +1351,9 @@ if test "$ac_init_help" = "recursive"; then case "$ac_dir" in .) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; *) - ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'` + ac_dir_suffix=/`printf "%s\n" "$ac_dir" | sed 's|^\.[\\/]||'` # A ".." for each directory in $ac_dir_suffix. - ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` + ac_top_builddir_sub=`printf "%s\n" "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` case $ac_top_builddir_sub in "") ac_top_builddir_sub=. ac_top_build_prefix= ;; *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; @@ -1351,7 +1381,8 @@ esac ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix cd "$ac_dir" || { ac_status=$?; continue; } - # Check for guested configure. + # Check for configure.gnu first; this name is used for a wrapper for + # Metaconfig's "Configure" on case-insensitive file systems. if test -f "$ac_srcdir/configure.gnu"; then echo && $SHELL "$ac_srcdir/configure.gnu" --help=recursive @@ -1359,7 +1390,7 @@ ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix echo && $SHELL "$ac_srcdir/configure" --help=recursive else - $as_echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2 + printf "%s\n" "$as_me: WARNING: no configuration information is in $ac_dir" >&2 fi || ac_status=$? cd "$ac_pwd" || { ac_status=$?; break; } done @@ -1369,9 +1400,9 @@ test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF configure -generated by GNU Autoconf 2.69 +generated by GNU Autoconf 2.71 -Copyright (C) 2012 Free Software Foundation, Inc. +Copyright (C) 2021 Free Software Foundation, Inc. This configure script is free software; the Free Software Foundation gives unlimited permission to copy, distribute and modify it. _ACEOF @@ -1388,14 +1419,14 @@ fi ac_fn_c_try_compile () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack - rm -f conftest.$ac_objext + rm -f conftest.$ac_objext conftest.beam if { { ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 +printf "%s\n" "$ac_try_echo"; } >&5 (eval "$ac_compile") 2>conftest.err ac_status=$? if test -s conftest.err; then @@ -1403,14 +1434,15 @@ $as_echo "$ac_try_echo"; } >&5 cat conftest.er1 >&5 mv -f conftest.er1 conftest.err fi - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err - } && test -s conftest.$ac_objext; then : + } && test -s conftest.$ac_objext +then : ac_retval=0 -else - $as_echo "$as_me: failed program was:" >&5 +else $as_nop + printf "%s\n" "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_retval=1 @@ -1426,14 +1458,14 @@ fi ac_fn_c_try_link () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack - rm -f conftest.$ac_objext conftest$ac_exeext + rm -f conftest.$ac_objext conftest.beam conftest$ac_exeext if { { ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 +printf "%s\n" "$ac_try_echo"; } >&5 (eval "$ac_link") 2>conftest.err ac_status=$? if test -s conftest.err; then @@ -1441,17 +1473,18 @@ $as_echo "$ac_try_echo"; } >&5 cat conftest.er1 >&5 mv -f conftest.er1 conftest.err fi - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest$ac_exeext && { test "$cross_compiling" = yes || test -x conftest$ac_exeext - }; then : + } +then : ac_retval=0 -else - $as_echo "$as_me: failed program was:" >&5 +else $as_nop + printf "%s\n" "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_retval=1 @@ -1478,7 +1511,7 @@ case "(($ac_try" in *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 +printf "%s\n" "$ac_try_echo"; } >&5 (eval "$ac_cpp conftest.$ac_ext") 2>conftest.err ac_status=$? if test -s conftest.err; then @@ -1486,14 +1519,15 @@ $as_echo "$ac_try_echo"; } >&5 cat conftest.er1 >&5 mv -f conftest.er1 conftest.err fi - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } > conftest.i && { test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || test ! -s conftest.err - }; then : + } +then : ac_retval=0 -else - $as_echo "$as_me: failed program was:" >&5 +else $as_nop + printf "%s\n" "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_retval=1 @@ -1505,8 +1539,8 @@ fi # ac_fn_c_try_run LINENO # ---------------------- -# Try to link conftest.$ac_ext, and return whether this succeeded. Assumes -# that executables *can* be run. +# Try to run conftest.$ac_ext, and return whether this succeeded. Assumes that +# executables *can* be run. ac_fn_c_try_run () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack @@ -1516,25 +1550,26 @@ case "(($ac_try" in *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 +printf "%s\n" "$ac_try_echo"; } >&5 (eval "$ac_link") 2>&5 ac_status=$? - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } && { ac_try='./conftest$ac_exeext' { { case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 +printf "%s\n" "$ac_try_echo"; } >&5 (eval "$ac_try") 2>&5 ac_status=$? - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; }; }; then : + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; } +then : ac_retval=0 -else - $as_echo "$as_me: program exited with status $ac_status" >&5 - $as_echo "$as_me: failed program was:" >&5 +else $as_nop + printf "%s\n" "$as_me: program exited with status $ac_status" >&5 + printf "%s\n" "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_retval=$ac_status @@ -1544,14 +1579,34 @@ fi as_fn_set_status $ac_retval } # ac_fn_c_try_run +ac_configure_args_raw= +for ac_arg +do + case $ac_arg in + *\'*) + ac_arg=`printf "%s\n" "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; + esac + as_fn_append ac_configure_args_raw " '$ac_arg'" +done + +case $ac_configure_args_raw in + *$as_nl*) + ac_safe_unquote= ;; + *) + ac_unsafe_z='|&;<>()$`\\"*?[ '' ' # This string ends in space, tab. + ac_unsafe_a="$ac_unsafe_z#~" + ac_safe_unquote="s/ '\\([^$ac_unsafe_a][^$ac_unsafe_z]*\\)'/ \\1/g" + ac_configure_args_raw=` printf "%s\n" "$ac_configure_args_raw" | sed "$ac_safe_unquote"`;; +esac + cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. It was created by $as_me, which was -generated by GNU Autoconf 2.69. Invocation command line was +generated by GNU Autoconf 2.71. Invocation command line was - $ $0 $@ + $ $0$ac_configure_args_raw _ACEOF exec 5>>config.log @@ -1584,8 +1639,12 @@ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - $as_echo "PATH: $as_dir" + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + printf "%s\n" "PATH: $as_dir" done IFS=$as_save_IFS @@ -1620,7 +1679,7 @@ do | -silent | --silent | --silen | --sile | --sil) continue ;; *\'*) - ac_arg=`$as_echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; + ac_arg=`printf "%s\n" "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; esac case $ac_pass in 1) as_fn_append ac_configure_args0 " '$ac_arg'" ;; @@ -1655,11 +1714,13 @@ done # WARNING: Use '\'' to represent an apostrophe within the trap. # WARNING: Do not start the trap code with a newline, due to a FreeBSD 4.0 bug. trap 'exit_status=$? + # Sanitize IFS. + IFS=" "" $as_nl" # Save into config.log some information that might help in debugging. { echo - $as_echo "## ---------------- ## + printf "%s\n" "## ---------------- ## ## Cache variables. ## ## ---------------- ##" echo @@ -1670,8 +1731,8 @@ trap 'exit_status=$? case $ac_val in #( *${as_nl}*) case $ac_var in #( - *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 -$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; + *_cv_*) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 +printf "%s\n" "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; esac case $ac_var in #( _ | IFS | as_nl) ;; #( @@ -1695,7 +1756,7 @@ $as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; ) echo - $as_echo "## ----------------- ## + printf "%s\n" "## ----------------- ## ## Output variables. ## ## ----------------- ##" echo @@ -1703,14 +1764,14 @@ $as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; do eval ac_val=\$$ac_var case $ac_val in - *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; + *\'\''*) ac_val=`printf "%s\n" "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; esac - $as_echo "$ac_var='\''$ac_val'\''" + printf "%s\n" "$ac_var='\''$ac_val'\''" done | sort echo if test -n "$ac_subst_files"; then - $as_echo "## ------------------- ## + printf "%s\n" "## ------------------- ## ## File substitutions. ## ## ------------------- ##" echo @@ -1718,15 +1779,15 @@ $as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; do eval ac_val=\$$ac_var case $ac_val in - *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; + *\'\''*) ac_val=`printf "%s\n" "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; esac - $as_echo "$ac_var='\''$ac_val'\''" + printf "%s\n" "$ac_var='\''$ac_val'\''" done | sort echo fi if test -s confdefs.h; then - $as_echo "## ----------- ## + printf "%s\n" "## ----------- ## ## confdefs.h. ## ## ----------- ##" echo @@ -1734,8 +1795,8 @@ $as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; echo fi test "$ac_signal" != 0 && - $as_echo "$as_me: caught signal $ac_signal" - $as_echo "$as_me: exit $exit_status" + printf "%s\n" "$as_me: caught signal $ac_signal" + printf "%s\n" "$as_me: exit $exit_status" } >&5 rm -f core *.core core.conftest.* && rm -f -r conftest* confdefs* conf$$* $ac_clean_files && @@ -1749,63 +1810,48 @@ ac_signal=0 # confdefs.h avoids OS command line length limits that DEFS can exceed. rm -f -r conftest* confdefs.h -$as_echo "/* confdefs.h */" > confdefs.h +printf "%s\n" "/* confdefs.h */" > confdefs.h # Predefined preprocessor variables. -cat >>confdefs.h <<_ACEOF -#define PACKAGE_NAME "$PACKAGE_NAME" -_ACEOF +printf "%s\n" "#define PACKAGE_NAME \"$PACKAGE_NAME\"" >>confdefs.h -cat >>confdefs.h <<_ACEOF -#define PACKAGE_TARNAME "$PACKAGE_TARNAME" -_ACEOF +printf "%s\n" "#define PACKAGE_TARNAME \"$PACKAGE_TARNAME\"" >>confdefs.h -cat >>confdefs.h <<_ACEOF -#define PACKAGE_VERSION "$PACKAGE_VERSION" -_ACEOF +printf "%s\n" "#define PACKAGE_VERSION \"$PACKAGE_VERSION\"" >>confdefs.h -cat >>confdefs.h <<_ACEOF -#define PACKAGE_STRING "$PACKAGE_STRING" -_ACEOF +printf "%s\n" "#define PACKAGE_STRING \"$PACKAGE_STRING\"" >>confdefs.h -cat >>confdefs.h <<_ACEOF -#define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT" -_ACEOF +printf "%s\n" "#define PACKAGE_BUGREPORT \"$PACKAGE_BUGREPORT\"" >>confdefs.h -cat >>confdefs.h <<_ACEOF -#define PACKAGE_URL "$PACKAGE_URL" -_ACEOF +printf "%s\n" "#define PACKAGE_URL \"$PACKAGE_URL\"" >>confdefs.h # Let the site file select an alternate cache file if it wants to. # Prefer an explicitly selected file to automatically selected ones. -ac_site_file1=NONE -ac_site_file2=NONE if test -n "$CONFIG_SITE"; then - # We do not want a PATH search for config.site. - case $CONFIG_SITE in #(( - -*) ac_site_file1=./$CONFIG_SITE;; - */*) ac_site_file1=$CONFIG_SITE;; - *) ac_site_file1=./$CONFIG_SITE;; - esac + ac_site_files="$CONFIG_SITE" elif test "x$prefix" != xNONE; then - ac_site_file1=$prefix/share/config.site - ac_site_file2=$prefix/etc/config.site + ac_site_files="$prefix/share/config.site $prefix/etc/config.site" else - ac_site_file1=$ac_default_prefix/share/config.site - ac_site_file2=$ac_default_prefix/etc/config.site + ac_site_files="$ac_default_prefix/share/config.site $ac_default_prefix/etc/config.site" fi -for ac_site_file in "$ac_site_file1" "$ac_site_file2" + +for ac_site_file in $ac_site_files do - test "x$ac_site_file" = xNONE && continue - if test /dev/null != "$ac_site_file" && test -r "$ac_site_file"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: loading site script $ac_site_file" >&5 -$as_echo "$as_me: loading site script $ac_site_file" >&6;} + case $ac_site_file in #( + */*) : + ;; #( + *) : + ac_site_file=./$ac_site_file ;; +esac + if test -f "$ac_site_file" && test -r "$ac_site_file"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: loading site script $ac_site_file" >&5 +printf "%s\n" "$as_me: loading site script $ac_site_file" >&6;} sed 's/^/| /' "$ac_site_file" >&5 . "$ac_site_file" \ - || { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} + || { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "failed to load site script $ac_site_file See \`config.log' for more details" "$LINENO" 5; } fi @@ -1815,19 +1861,425 @@ if test -r "$cache_file"; then # Some versions of bash will fail to source /dev/null (special files # actually), so we avoid doing that. DJGPP emulates it as a regular file. if test /dev/null != "$cache_file" && test -f "$cache_file"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: loading cache $cache_file" >&5 -$as_echo "$as_me: loading cache $cache_file" >&6;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: loading cache $cache_file" >&5 +printf "%s\n" "$as_me: loading cache $cache_file" >&6;} case $cache_file in [\\/]* | ?:[\\/]* ) . "$cache_file";; *) . "./$cache_file";; esac fi else - { $as_echo "$as_me:${as_lineno-$LINENO}: creating cache $cache_file" >&5 -$as_echo "$as_me: creating cache $cache_file" >&6;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: creating cache $cache_file" >&5 +printf "%s\n" "$as_me: creating cache $cache_file" >&6;} >$cache_file fi +# Test code for whether the C compiler supports C89 (global declarations) +ac_c_conftest_c89_globals=' +/* Does the compiler advertise C89 conformance? + Do not test the value of __STDC__, because some compilers set it to 0 + while being otherwise adequately conformant. */ +#if !defined __STDC__ +# error "Compiler does not advertise C89 conformance" +#endif + +#include +#include +struct stat; +/* Most of the following tests are stolen from RCS 5.7 src/conf.sh. */ +struct buf { int x; }; +struct buf * (*rcsopen) (struct buf *, struct stat *, int); +static char *e (p, i) + char **p; + int i; +{ + return p[i]; +} +static char *f (char * (*g) (char **, int), char **p, ...) +{ + char *s; + va_list v; + va_start (v,p); + s = g (p, va_arg (v,int)); + va_end (v); + return s; +} + +/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has + function prototypes and stuff, but not \xHH hex character constants. + These do not provoke an error unfortunately, instead are silently treated + as an "x". The following induces an error, until -std is added to get + proper ANSI mode. Curiously \x00 != x always comes out true, for an + array size at least. It is necessary to write \x00 == 0 to get something + that is true only with -std. */ +int osf4_cc_array ['\''\x00'\'' == 0 ? 1 : -1]; + +/* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters + inside strings and character constants. */ +#define FOO(x) '\''x'\'' +int xlc6_cc_array[FOO(a) == '\''x'\'' ? 1 : -1]; + +int test (int i, double x); +struct s1 {int (*f) (int a);}; +struct s2 {int (*f) (double a);}; +int pairnames (int, char **, int *(*)(struct buf *, struct stat *, int), + int, int);' + +# Test code for whether the C compiler supports C89 (body of main). +ac_c_conftest_c89_main=' +ok |= (argc == 0 || f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]); +' + +# Test code for whether the C compiler supports C99 (global declarations) +ac_c_conftest_c99_globals=' +// Does the compiler advertise C99 conformance? +#if !defined __STDC_VERSION__ || __STDC_VERSION__ < 199901L +# error "Compiler does not advertise C99 conformance" +#endif + +#include +extern int puts (const char *); +extern int printf (const char *, ...); +extern int dprintf (int, const char *, ...); +extern void *malloc (size_t); + +// Check varargs macros. These examples are taken from C99 6.10.3.5. +// dprintf is used instead of fprintf to avoid needing to declare +// FILE and stderr. +#define debug(...) dprintf (2, __VA_ARGS__) +#define showlist(...) puts (#__VA_ARGS__) +#define report(test,...) ((test) ? puts (#test) : printf (__VA_ARGS__)) +static void +test_varargs_macros (void) +{ + int x = 1234; + int y = 5678; + debug ("Flag"); + debug ("X = %d\n", x); + showlist (The first, second, and third items.); + report (x>y, "x is %d but y is %d", x, y); +} + +// Check long long types. +#define BIG64 18446744073709551615ull +#define BIG32 4294967295ul +#define BIG_OK (BIG64 / BIG32 == 4294967297ull && BIG64 % BIG32 == 0) +#if !BIG_OK + #error "your preprocessor is broken" +#endif +#if BIG_OK +#else + #error "your preprocessor is broken" +#endif +static long long int bignum = -9223372036854775807LL; +static unsigned long long int ubignum = BIG64; + +struct incomplete_array +{ + int datasize; + double data[]; +}; + +struct named_init { + int number; + const wchar_t *name; + double average; +}; + +typedef const char *ccp; + +static inline int +test_restrict (ccp restrict text) +{ + // See if C++-style comments work. + // Iterate through items via the restricted pointer. + // Also check for declarations in for loops. + for (unsigned int i = 0; *(text+i) != '\''\0'\''; ++i) + continue; + return 0; +} + +// Check varargs and va_copy. +static bool +test_varargs (const char *format, ...) +{ + va_list args; + va_start (args, format); + va_list args_copy; + va_copy (args_copy, args); + + const char *str = ""; + int number = 0; + float fnumber = 0; + + while (*format) + { + switch (*format++) + { + case '\''s'\'': // string + str = va_arg (args_copy, const char *); + break; + case '\''d'\'': // int + number = va_arg (args_copy, int); + break; + case '\''f'\'': // float + fnumber = va_arg (args_copy, double); + break; + default: + break; + } + } + va_end (args_copy); + va_end (args); + + return *str && number && fnumber; +} +' + +# Test code for whether the C compiler supports C99 (body of main). +ac_c_conftest_c99_main=' + // Check bool. + _Bool success = false; + success |= (argc != 0); + + // Check restrict. + if (test_restrict ("String literal") == 0) + success = true; + char *restrict newvar = "Another string"; + + // Check varargs. + success &= test_varargs ("s, d'\'' f .", "string", 65, 34.234); + test_varargs_macros (); + + // Check flexible array members. + struct incomplete_array *ia = + malloc (sizeof (struct incomplete_array) + (sizeof (double) * 10)); + ia->datasize = 10; + for (int i = 0; i < ia->datasize; ++i) + ia->data[i] = i * 1.234; + + // Check named initializers. + struct named_init ni = { + .number = 34, + .name = L"Test wide string", + .average = 543.34343, + }; + + ni.number = 58; + + int dynamic_array[ni.number]; + dynamic_array[0] = argv[0][0]; + dynamic_array[ni.number - 1] = 543; + + // work around unused variable warnings + ok |= (!success || bignum == 0LL || ubignum == 0uLL || newvar[0] == '\''x'\'' + || dynamic_array[ni.number - 1] != 543); +' + +# Test code for whether the C compiler supports C11 (global declarations) +ac_c_conftest_c11_globals=' +// Does the compiler advertise C11 conformance? +#if !defined __STDC_VERSION__ || __STDC_VERSION__ < 201112L +# error "Compiler does not advertise C11 conformance" +#endif + +// Check _Alignas. +char _Alignas (double) aligned_as_double; +char _Alignas (0) no_special_alignment; +extern char aligned_as_int; +char _Alignas (0) _Alignas (int) aligned_as_int; + +// Check _Alignof. +enum +{ + int_alignment = _Alignof (int), + int_array_alignment = _Alignof (int[100]), + char_alignment = _Alignof (char) +}; +_Static_assert (0 < -_Alignof (int), "_Alignof is signed"); + +// Check _Noreturn. +int _Noreturn does_not_return (void) { for (;;) continue; } + +// Check _Static_assert. +struct test_static_assert +{ + int x; + _Static_assert (sizeof (int) <= sizeof (long int), + "_Static_assert does not work in struct"); + long int y; +}; + +// Check UTF-8 literals. +#define u8 syntax error! +char const utf8_literal[] = u8"happens to be ASCII" "another string"; + +// Check duplicate typedefs. +typedef long *long_ptr; +typedef long int *long_ptr; +typedef long_ptr long_ptr; + +// Anonymous structures and unions -- taken from C11 6.7.2.1 Example 1. +struct anonymous +{ + union { + struct { int i; int j; }; + struct { int k; long int l; } w; + }; + int m; +} v1; +' + +# Test code for whether the C compiler supports C11 (body of main). +ac_c_conftest_c11_main=' + _Static_assert ((offsetof (struct anonymous, i) + == offsetof (struct anonymous, w.k)), + "Anonymous union alignment botch"); + v1.i = 2; + v1.w.k = 5; + ok |= v1.i != 5; +' + +# Test code for whether the C compiler supports C11 (complete). +ac_c_conftest_c11_program="${ac_c_conftest_c89_globals} +${ac_c_conftest_c99_globals} +${ac_c_conftest_c11_globals} + +int +main (int argc, char **argv) +{ + int ok = 0; + ${ac_c_conftest_c89_main} + ${ac_c_conftest_c99_main} + ${ac_c_conftest_c11_main} + return ok; +} +" + +# Test code for whether the C compiler supports C99 (complete). +ac_c_conftest_c99_program="${ac_c_conftest_c89_globals} +${ac_c_conftest_c99_globals} + +int +main (int argc, char **argv) +{ + int ok = 0; + ${ac_c_conftest_c89_main} + ${ac_c_conftest_c99_main} + return ok; +} +" + +# Test code for whether the C compiler supports C89 (complete). +ac_c_conftest_c89_program="${ac_c_conftest_c89_globals} + +int +main (int argc, char **argv) +{ + int ok = 0; + ${ac_c_conftest_c89_main} + return ok; +} +" + + +# Auxiliary files required by this configure script. +ac_aux_files="config.guess config.sub" + +# Locations in which to look for auxiliary files. +ac_aux_dir_candidates="${srcdir}/build/autoconf" + +# Search for a directory containing all of the required auxiliary files, +# $ac_aux_files, from the $PATH-style list $ac_aux_dir_candidates. +# If we don't find one directory that contains all the files we need, +# we report the set of missing files from the *first* directory in +# $ac_aux_dir_candidates and give up. +ac_missing_aux_files="" +ac_first_candidate=: +printf "%s\n" "$as_me:${as_lineno-$LINENO}: looking for aux files: $ac_aux_files" >&5 +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +as_found=false +for as_dir in $ac_aux_dir_candidates +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + as_found=: + + printf "%s\n" "$as_me:${as_lineno-$LINENO}: trying $as_dir" >&5 + ac_aux_dir_found=yes + ac_install_sh= + for ac_aux in $ac_aux_files + do + # As a special case, if "install-sh" is required, that requirement + # can be satisfied by any of "install-sh", "install.sh", or "shtool", + # and $ac_install_sh is set appropriately for whichever one is found. + if test x"$ac_aux" = x"install-sh" + then + if test -f "${as_dir}install-sh"; then + printf "%s\n" "$as_me:${as_lineno-$LINENO}: ${as_dir}install-sh found" >&5 + ac_install_sh="${as_dir}install-sh -c" + elif test -f "${as_dir}install.sh"; then + printf "%s\n" "$as_me:${as_lineno-$LINENO}: ${as_dir}install.sh found" >&5 + ac_install_sh="${as_dir}install.sh -c" + elif test -f "${as_dir}shtool"; then + printf "%s\n" "$as_me:${as_lineno-$LINENO}: ${as_dir}shtool found" >&5 + ac_install_sh="${as_dir}shtool install -c" + else + ac_aux_dir_found=no + if $ac_first_candidate; then + ac_missing_aux_files="${ac_missing_aux_files} install-sh" + else + break + fi + fi + else + if test -f "${as_dir}${ac_aux}"; then + printf "%s\n" "$as_me:${as_lineno-$LINENO}: ${as_dir}${ac_aux} found" >&5 + else + ac_aux_dir_found=no + if $ac_first_candidate; then + ac_missing_aux_files="${ac_missing_aux_files} ${ac_aux}" + else + break + fi + fi + fi + done + if test "$ac_aux_dir_found" = yes; then + ac_aux_dir="$as_dir" + break + fi + ac_first_candidate=false + + as_found=false +done +IFS=$as_save_IFS +if $as_found +then : + +else $as_nop + as_fn_error $? "cannot find required auxiliary files:$ac_missing_aux_files" "$LINENO" 5 +fi + + +# These three variables are undocumented and unsupported, +# and are intended to be withdrawn in a future Autoconf release. +# They can cause serious problems if a builder's source tree is in a directory +# whose full name contains unusual characters. +if test -f "${ac_aux_dir}config.guess"; then + ac_config_guess="$SHELL ${ac_aux_dir}config.guess" +fi +if test -f "${ac_aux_dir}config.sub"; then + ac_config_sub="$SHELL ${ac_aux_dir}config.sub" +fi +if test -f "$ac_aux_dir/configure"; then + ac_configure="$SHELL ${ac_aux_dir}configure" +fi + # Check that the precious variables saved in the cache have kept the same # value. ac_cache_corrupted=false @@ -1838,12 +2290,12 @@ for ac_var in $ac_precious_vars; do eval ac_new_val=\$ac_env_${ac_var}_value case $ac_old_set,$ac_new_set in set,) - { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5 -$as_echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5 +printf "%s\n" "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;} ac_cache_corrupted=: ;; ,set) - { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was not set in the previous run" >&5 -$as_echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was not set in the previous run" >&5 +printf "%s\n" "$as_me: error: \`$ac_var' was not set in the previous run" >&2;} ac_cache_corrupted=: ;; ,);; *) @@ -1852,24 +2304,24 @@ $as_echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;} ac_old_val_w=`echo x $ac_old_val` ac_new_val_w=`echo x $ac_new_val` if test "$ac_old_val_w" != "$ac_new_val_w"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' has changed since the previous run:" >&5 -$as_echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' has changed since the previous run:" >&5 +printf "%s\n" "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;} ac_cache_corrupted=: else - { $as_echo "$as_me:${as_lineno-$LINENO}: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&5 -$as_echo "$as_me: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&2;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&5 +printf "%s\n" "$as_me: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&2;} eval $ac_var=\$ac_old_val fi - { $as_echo "$as_me:${as_lineno-$LINENO}: former value: \`$ac_old_val'" >&5 -$as_echo "$as_me: former value: \`$ac_old_val'" >&2;} - { $as_echo "$as_me:${as_lineno-$LINENO}: current value: \`$ac_new_val'" >&5 -$as_echo "$as_me: current value: \`$ac_new_val'" >&2;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: former value: \`$ac_old_val'" >&5 +printf "%s\n" "$as_me: former value: \`$ac_old_val'" >&2;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: current value: \`$ac_new_val'" >&5 +printf "%s\n" "$as_me: current value: \`$ac_new_val'" >&2;} fi;; esac # Pass precious variables to config.status. if test "$ac_new_set" = set; then case $ac_new_val in - *\'*) ac_arg=$ac_var=`$as_echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;; + *\'*) ac_arg=$ac_var=`printf "%s\n" "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;; *) ac_arg=$ac_var=$ac_new_val ;; esac case " $ac_configure_args " in @@ -1879,11 +2331,12 @@ $as_echo "$as_me: current value: \`$ac_new_val'" >&2;} fi done if $ac_cache_corrupted; then - { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} - { $as_echo "$as_me:${as_lineno-$LINENO}: error: changes in the environment can compromise the build" >&5 -$as_echo "$as_me: error: changes in the environment can compromise the build" >&2;} - as_fn_error $? "run \`make distclean' and/or \`rm $cache_file' and start over" "$LINENO" 5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: changes in the environment can compromise the build" >&5 +printf "%s\n" "$as_me: error: changes in the environment can compromise the build" >&2;} + as_fn_error $? "run \`${MAKE-make} distclean' and/or \`rm $cache_file' + and start over" "$LINENO" 5 fi ## -------------------- ## ## Main body of script. ## @@ -1897,178 +2350,34 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu -ac_config_headers="$ac_config_headers src/config.h" - -ac_aux_dir= -for ac_dir in build/autoconf "$srcdir"/build/autoconf; do - if test -f "$ac_dir/install-sh"; then - ac_aux_dir=$ac_dir - ac_install_sh="$ac_aux_dir/install-sh -c" - break - elif test -f "$ac_dir/install.sh"; then - ac_aux_dir=$ac_dir - ac_install_sh="$ac_aux_dir/install.sh -c" - break - elif test -f "$ac_dir/shtool"; then - ac_aux_dir=$ac_dir - ac_install_sh="$ac_aux_dir/shtool install -c" - break - fi -done -if test -z "$ac_aux_dir"; then - as_fn_error $? "cannot find install-sh, install.sh, or shtool in build/autoconf \"$srcdir\"/build/autoconf" "$LINENO" 5 -fi -# These three variables are undocumented and unsupported, -# and are intended to be withdrawn in a future Autoconf release. -# They can cause serious problems if a builder's source tree is in a directory -# whose full name contains unusual characters. -ac_config_guess="$SHELL $ac_aux_dir/config.guess" # Please don't use this var. -ac_config_sub="$SHELL $ac_aux_dir/config.sub" # Please don't use this var. -ac_configure="$SHELL $ac_aux_dir/configure" # Please don't use this var. +ac_config_headers="$ac_config_headers src/config.h" ac_config_files="$ac_config_files src/Makevars" -# generated automatically by aclocal 1.15.1 -*- Autoconf -*- - -# Copyright (C) 1996-2017 Free Software Foundation, Inc. - -# This file is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY, to the extent permitted by law; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR A -# PARTICULAR PURPOSE. - - -# =========================================================================== -# https://www.gnu.org/software/autoconf-archive/ax_pthread.html -# =========================================================================== -# -# SYNOPSIS -# -# AX_PTHREAD([ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]]) -# -# DESCRIPTION -# -# This macro figures out how to build C programs using POSIX threads. It -# sets the PTHREAD_LIBS output variable to the threads library and linker -# flags, and the PTHREAD_CFLAGS output variable to any special C compiler -# flags that are needed. (The user can also force certain compiler -# flags/libs to be tested by setting these environment variables.) -# -# Also sets PTHREAD_CC to any special C compiler that is needed for -# multi-threaded programs (defaults to the value of CC otherwise). (This -# is necessary on AIX to use the special cc_r compiler alias.) -# -# NOTE: You are assumed to not only compile your program with these flags, -# but also to link with them as well. For example, you might link with -# $PTHREAD_CC $CFLAGS $PTHREAD_CFLAGS $LDFLAGS ... $PTHREAD_LIBS $LIBS -# -# If you are only building threaded programs, you may wish to use these -# variables in your default LIBS, CFLAGS, and CC: -# -# LIBS="$PTHREAD_LIBS $LIBS" -# CFLAGS="$CFLAGS $PTHREAD_CFLAGS" -# CC="$PTHREAD_CC" -# -# In addition, if the PTHREAD_CREATE_JOINABLE thread-attribute constant -# has a nonstandard name, this macro defines PTHREAD_CREATE_JOINABLE to -# that name (e.g. PTHREAD_CREATE_UNDETACHED on AIX). -# -# Also HAVE_PTHREAD_PRIO_INHERIT is defined if pthread is found and the -# PTHREAD_PRIO_INHERIT symbol is defined when compiling with -# PTHREAD_CFLAGS. -# -# ACTION-IF-FOUND is a list of shell commands to run if a threads library -# is found, and ACTION-IF-NOT-FOUND is a list of commands to run it if it -# is not found. If ACTION-IF-FOUND is not specified, the default action -# will define HAVE_PTHREAD. -# -# Please let the authors know if this macro fails on any platform, or if -# you have any other suggestions or comments. This macro was based on work -# by SGJ on autoconf scripts for FFTW (http://www.fftw.org/) (with help -# from M. Frigo), as well as ac_pthread and hb_pthread macros posted by -# Alejandro Forero Cuervo to the autoconf macro repository. We are also -# grateful for the helpful feedback of numerous users. -# -# Updated for Autoconf 2.68 by Daniel Richard G. -# -# LICENSE -# -# Copyright (c) 2008 Steven G. Johnson -# Copyright (c) 2011 Daniel Richard G. -# -# This program is free software: you can redistribute it and/or modify it -# under the terms of the GNU General Public License as published by the -# Free Software Foundation, either version 3 of the License, or (at your -# option) any later version. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General -# Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program. If not, see . -# -# As a special exception, the respective Autoconf Macro's copyright owner -# gives unlimited permission to copy, distribute and modify the configure -# scripts that are the output of Autoconf when processing the Macro. You -# need not follow the terms of the GNU General Public License when using -# or distributing such scripts, even though portions of the text of the -# Macro appear in them. The GNU General Public License (GPL) does govern -# all other use of the material that constitutes the Autoconf Macro. -# -# This special exception to the GPL applies to versions of the Autoconf -# Macro released by the Autoconf Archive. When you make and distribute a -# modified version of the Autoconf Macro, you may extend this special -# exception to the GPL to apply to your modified version as well. - -#serial 24 - -# This is what autoupdate's m4 run will expand. It fires -# the warning (with _au_warn_XXX), outputs it into the -# updated configure.ac (with AC_DIAGNOSE), and then outputs -# the replacement expansion. - - -# This is an auxiliary macro that is also run when -# autoupdate runs m4. It simply calls m4_warning, but -# we need a wrapper so that each warning is emitted only -# once. We break the quoting in m4_warning's argument in -# order to expand this macro's arguments, not AU_DEFUN's. - - -# Finally, this is the expansion that is picked up by -# autoconf. It tells the user to run autoupdate, and -# then outputs the replacement expansion. We do not care -# about autoupdate's warning because that contains -# information on what to do *after* running autoupdate. - - # Check whether --enable-threads was given. -if test "${enable_threads+set}" = set; then : +if test ${enable_threads+y} +then : enableval=$enable_threads; do_threads=$enableval -else +else $as_nop do_threads=yes fi # Check whether --enable-huge was given. -if test "${enable_huge+set}" = set; then : +if test ${enable_huge+y} +then : enableval=$enable_huge; -$as_echo "#define HUGE_INT 1" >>confdefs.h +printf "%s\n" "#define HUGE_INT 1" >>confdefs.h fi # Check whether --enable-pedantry was given. -if test "${enable_pedantry+set}" = set; then : +if test ${enable_pedantry+y} +then : enableval=$enable_pedantry; DFLAGS='-Wall -Wextra -Wpedantic' fi @@ -2080,7 +2389,16 @@ CFLAGS=`${R_HOME}/bin/R CMD config CFLAGS` LDFLAGS=`${R_HOME}/bin/R CMD config LDFLAGS` # Checks for programs. -ac_ext=c + + + + + + + + + +ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' @@ -2088,11 +2406,12 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args. set dummy ${ac_tool_prefix}gcc; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_CC+:} false; then : - $as_echo_n "(cached) " >&6 -else +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_CC+y} +then : + printf %s "(cached) " >&6 +else $as_nop if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else @@ -2100,11 +2419,15 @@ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_CC="${ac_tool_prefix}gcc" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done @@ -2115,11 +2438,11 @@ fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 -$as_echo "$CC" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +printf "%s\n" "$CC" >&6; } else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } fi @@ -2128,11 +2451,12 @@ if test -z "$ac_cv_prog_CC"; then ac_ct_CC=$CC # Extract the first word of "gcc", so it can be a program name with args. set dummy gcc; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_ac_ct_CC+:} false; then : - $as_echo_n "(cached) " >&6 -else +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_CC+y} +then : + printf %s "(cached) " >&6 +else $as_nop if test -n "$ac_ct_CC"; then ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. else @@ -2140,11 +2464,15 @@ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_CC="gcc" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done @@ -2155,11 +2483,11 @@ fi fi ac_ct_CC=$ac_cv_prog_ac_ct_CC if test -n "$ac_ct_CC"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 -$as_echo "$ac_ct_CC" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 +printf "%s\n" "$ac_ct_CC" >&6; } else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } fi if test "x$ac_ct_CC" = x; then @@ -2167,8 +2495,8 @@ fi else case $cross_compiling:$ac_tool_warned in yes:) -{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 -$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac CC=$ac_ct_CC @@ -2181,11 +2509,12 @@ if test -z "$CC"; then if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args. set dummy ${ac_tool_prefix}cc; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_CC+:} false; then : - $as_echo_n "(cached) " >&6 -else +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_CC+y} +then : + printf %s "(cached) " >&6 +else $as_nop if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else @@ -2193,11 +2522,15 @@ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_CC="${ac_tool_prefix}cc" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done @@ -2208,11 +2541,11 @@ fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 -$as_echo "$CC" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +printf "%s\n" "$CC" >&6; } else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } fi @@ -2221,11 +2554,12 @@ fi if test -z "$CC"; then # Extract the first word of "cc", so it can be a program name with args. set dummy cc; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_CC+:} false; then : - $as_echo_n "(cached) " >&6 -else +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_CC+y} +then : + printf %s "(cached) " >&6 +else $as_nop if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else @@ -2234,15 +2568,19 @@ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then - if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + if test "$as_dir$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then ac_prog_rejected=yes continue fi ac_cv_prog_CC="cc" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done @@ -2258,18 +2596,18 @@ if test $ac_prog_rejected = yes; then # However, it has the same basename, so the bogon will be chosen # first if we set CC to just the basename; use the full file name. shift - ac_cv_prog_CC="$as_dir/$ac_word${1+' '}$@" + ac_cv_prog_CC="$as_dir$ac_word${1+' '}$@" fi fi fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 -$as_echo "$CC" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +printf "%s\n" "$CC" >&6; } else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } fi @@ -2280,11 +2618,12 @@ if test -z "$CC"; then do # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. set dummy $ac_tool_prefix$ac_prog; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_CC+:} false; then : - $as_echo_n "(cached) " >&6 -else +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_CC+y} +then : + printf %s "(cached) " >&6 +else $as_nop if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else @@ -2292,11 +2631,15 @@ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_CC="$ac_tool_prefix$ac_prog" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done @@ -2307,11 +2650,11 @@ fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 -$as_echo "$CC" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +printf "%s\n" "$CC" >&6; } else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } fi @@ -2324,11 +2667,12 @@ if test -z "$CC"; then do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_ac_ct_CC+:} false; then : - $as_echo_n "(cached) " >&6 -else +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_CC+y} +then : + printf %s "(cached) " >&6 +else $as_nop if test -n "$ac_ct_CC"; then ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. else @@ -2336,11 +2680,15 @@ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_CC="$ac_prog" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done @@ -2351,11 +2699,11 @@ fi fi ac_ct_CC=$ac_cv_prog_ac_ct_CC if test -n "$ac_ct_CC"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 -$as_echo "$ac_ct_CC" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 +printf "%s\n" "$ac_ct_CC" >&6; } else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } fi @@ -2367,8 +2715,8 @@ done else case $cross_compiling:$ac_tool_warned in yes:) -{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 -$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac CC=$ac_ct_CC @@ -2376,25 +2724,129 @@ esac fi fi +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}clang", so it can be a program name with args. +set dummy ${ac_tool_prefix}clang; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_CC+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="${ac_tool_prefix}clang" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +printf "%s\n" "$CC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi -test -z "$CC" && { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +fi +if test -z "$ac_cv_prog_CC"; then + ac_ct_CC=$CC + # Extract the first word of "clang", so it can be a program name with args. +set dummy clang; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_CC+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="clang" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 +printf "%s\n" "$ac_ct_CC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_ct_CC" = x; then + CC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC + fi +else + CC="$ac_cv_prog_CC" +fi + +fi + + +test -z "$CC" && { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "no acceptable C compiler found in \$PATH See \`config.log' for more details" "$LINENO" 5; } # Provide some information about the compiler. -$as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5 +printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5 set X $ac_compile ac_compiler=$2 -for ac_option in --version -v -V -qversion; do +for ac_option in --version -v -V -qversion -version; do { { ac_try="$ac_compiler $ac_option >&5" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 +printf "%s\n" "$ac_try_echo"; } >&5 (eval "$ac_compiler $ac_option >&5") 2>conftest.err ac_status=$? if test -s conftest.err; then @@ -2404,7 +2856,7 @@ $as_echo "$ac_try_echo"; } >&5 cat conftest.er1 >&5 fi rm -f conftest.er1 conftest.err - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } done @@ -2412,7 +2864,7 @@ cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int -main () +main (void) { ; @@ -2424,9 +2876,9 @@ ac_clean_files="$ac_clean_files a.out a.out.dSYM a.exe b.out" # Try to create an executable without -o first, disregard a.out. # It will help us diagnose broken compilers, and finding out an intuition # of exeext. -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the C compiler works" >&5 -$as_echo_n "checking whether the C compiler works... " >&6; } -ac_link_default=`$as_echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'` +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the C compiler works" >&5 +printf %s "checking whether the C compiler works... " >&6; } +ac_link_default=`printf "%s\n" "$ac_link" | sed 's/ -o *conftest[^ ]*//'` # The possible output files: ac_files="a.out conftest.exe conftest a.exe a_out.exe b.out conftest.*" @@ -2447,11 +2899,12 @@ case "(($ac_try" in *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 +printf "%s\n" "$ac_try_echo"; } >&5 (eval "$ac_link_default") 2>&5 ac_status=$? - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; }; then : + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } +then : # Autoconf-2.13 could set the ac_cv_exeext variable to `no'. # So ignore a value of `no', otherwise this would lead to `EXEEXT = no' # in a Makefile. We should not override ac_cv_exeext if it was cached, @@ -2468,7 +2921,7 @@ do # certainly right. break;; *.* ) - if test "${ac_cv_exeext+set}" = set && test "$ac_cv_exeext" != no; + if test ${ac_cv_exeext+y} && test "$ac_cv_exeext" != no; then :; else ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` fi @@ -2484,44 +2937,46 @@ do done test "$ac_cv_exeext" = no && ac_cv_exeext= -else +else $as_nop ac_file='' fi -if test -z "$ac_file"; then : - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } -$as_echo "$as_me: failed program was:" >&5 +if test -z "$ac_file" +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +printf "%s\n" "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 -{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +{ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error 77 "C compiler cannot create executables See \`config.log' for more details" "$LINENO" 5; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -$as_echo "yes" >&6; } -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler default output file name" >&5 -$as_echo_n "checking for C compiler default output file name... " >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_file" >&5 -$as_echo "$ac_file" >&6; } +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for C compiler default output file name" >&5 +printf %s "checking for C compiler default output file name... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_file" >&5 +printf "%s\n" "$ac_file" >&6; } ac_exeext=$ac_cv_exeext rm -f -r a.out a.out.dSYM a.exe conftest$ac_cv_exeext b.out ac_clean_files=$ac_clean_files_save -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of executables" >&5 -$as_echo_n "checking for suffix of executables... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for suffix of executables" >&5 +printf %s "checking for suffix of executables... " >&6; } if { { ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 +printf "%s\n" "$ac_try_echo"; } >&5 (eval "$ac_link") 2>&5 ac_status=$? - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; }; then : + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } +then : # If both `conftest.exe' and `conftest' are `present' (well, observable) # catch `conftest.exe'. For instance with Cygwin, `ls conftest' will # work properly (i.e., refer to `conftest.exe'), while it won't with @@ -2535,15 +2990,15 @@ for ac_file in conftest.exe conftest conftest.*; do * ) break;; esac done -else - { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +else $as_nop + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "cannot compute suffix of executables: cannot compile and link See \`config.log' for more details" "$LINENO" 5; } fi rm -f conftest conftest$ac_cv_exeext -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_exeext" >&5 -$as_echo "$ac_cv_exeext" >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_exeext" >&5 +printf "%s\n" "$ac_cv_exeext" >&6; } rm -f conftest.$ac_ext EXEEXT=$ac_cv_exeext @@ -2552,7 +3007,7 @@ cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include int -main () +main (void) { FILE *f = fopen ("conftest.out", "w"); return ferror (f) || fclose (f) != 0; @@ -2564,8 +3019,8 @@ _ACEOF ac_clean_files="$ac_clean_files conftest.out" # Check that the compiler produces executables we can run. If not, either # the compiler is broken, or we cross compile. -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are cross compiling" >&5 -$as_echo_n "checking whether we are cross compiling... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether we are cross compiling" >&5 +printf %s "checking whether we are cross compiling... " >&6; } if test "$cross_compiling" != yes; then { { ac_try="$ac_link" case "(($ac_try" in @@ -2573,10 +3028,10 @@ case "(($ac_try" in *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 +printf "%s\n" "$ac_try_echo"; } >&5 (eval "$ac_link") 2>&5 ac_status=$? - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } if { ac_try='./conftest$ac_cv_exeext' { { case "(($ac_try" in @@ -2584,39 +3039,40 @@ $as_echo "$ac_try_echo"; } >&5 *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 +printf "%s\n" "$ac_try_echo"; } >&5 (eval "$ac_try") 2>&5 ac_status=$? - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; }; then cross_compiling=no else if test "$cross_compiling" = maybe; then cross_compiling=yes else - { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} -as_fn_error $? "cannot run C compiled programs. + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error 77 "cannot run C compiled programs. If you meant to cross compile, use \`--host'. See \`config.log' for more details" "$LINENO" 5; } fi fi fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $cross_compiling" >&5 -$as_echo "$cross_compiling" >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $cross_compiling" >&5 +printf "%s\n" "$cross_compiling" >&6; } rm -f conftest.$ac_ext conftest$ac_cv_exeext conftest.out ac_clean_files=$ac_clean_files_save -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of object files" >&5 -$as_echo_n "checking for suffix of object files... " >&6; } -if ${ac_cv_objext+:} false; then : - $as_echo_n "(cached) " >&6 -else +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for suffix of object files" >&5 +printf %s "checking for suffix of object files... " >&6; } +if test ${ac_cv_objext+y} +then : + printf %s "(cached) " >&6 +else $as_nop cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int -main () +main (void) { ; @@ -2630,11 +3086,12 @@ case "(($ac_try" in *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 +printf "%s\n" "$ac_try_echo"; } >&5 (eval "$ac_compile") 2>&5 ac_status=$? - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; }; then : + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } +then : for ac_file in conftest.o conftest.obj conftest.*; do test -f "$ac_file" || continue; case $ac_file in @@ -2643,31 +3100,32 @@ $as_echo "$ac_try_echo"; } >&5 break;; esac done -else - $as_echo "$as_me: failed program was:" >&5 +else $as_nop + printf "%s\n" "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 -{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +{ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "cannot compute suffix of object files: cannot compile See \`config.log' for more details" "$LINENO" 5; } fi rm -f conftest.$ac_cv_objext conftest.$ac_ext fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_objext" >&5 -$as_echo "$ac_cv_objext" >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_objext" >&5 +printf "%s\n" "$ac_cv_objext" >&6; } OBJEXT=$ac_cv_objext ac_objext=$OBJEXT -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C compiler" >&5 -$as_echo_n "checking whether we are using the GNU C compiler... " >&6; } -if ${ac_cv_c_compiler_gnu+:} false; then : - $as_echo_n "(cached) " >&6 -else +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the compiler supports GNU C" >&5 +printf %s "checking whether the compiler supports GNU C... " >&6; } +if test ${ac_cv_c_compiler_gnu+y} +then : + printf %s "(cached) " >&6 +else $as_nop cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int -main () +main (void) { #ifndef __GNUC__ choke me @@ -2677,29 +3135,33 @@ main () return 0; } _ACEOF -if ac_fn_c_try_compile "$LINENO"; then : +if ac_fn_c_try_compile "$LINENO" +then : ac_compiler_gnu=yes -else +else $as_nop ac_compiler_gnu=no fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ac_cv_c_compiler_gnu=$ac_compiler_gnu fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_compiler_gnu" >&5 -$as_echo "$ac_cv_c_compiler_gnu" >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_compiler_gnu" >&5 +printf "%s\n" "$ac_cv_c_compiler_gnu" >&6; } +ac_compiler_gnu=$ac_cv_c_compiler_gnu + if test $ac_compiler_gnu = yes; then GCC=yes else GCC= fi -ac_test_CFLAGS=${CFLAGS+set} +ac_test_CFLAGS=${CFLAGS+y} ac_save_CFLAGS=$CFLAGS -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts -g" >&5 -$as_echo_n "checking whether $CC accepts -g... " >&6; } -if ${ac_cv_prog_cc_g+:} false; then : - $as_echo_n "(cached) " >&6 -else +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts -g" >&5 +printf %s "checking whether $CC accepts -g... " >&6; } +if test ${ac_cv_prog_cc_g+y} +then : + printf %s "(cached) " >&6 +else $as_nop ac_save_c_werror_flag=$ac_c_werror_flag ac_c_werror_flag=yes ac_cv_prog_cc_g=no @@ -2708,57 +3170,60 @@ else /* end confdefs.h. */ int -main () +main (void) { ; return 0; } _ACEOF -if ac_fn_c_try_compile "$LINENO"; then : +if ac_fn_c_try_compile "$LINENO" +then : ac_cv_prog_cc_g=yes -else +else $as_nop CFLAGS="" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int -main () +main (void) { ; return 0; } _ACEOF -if ac_fn_c_try_compile "$LINENO"; then : +if ac_fn_c_try_compile "$LINENO" +then : -else +else $as_nop ac_c_werror_flag=$ac_save_c_werror_flag CFLAGS="-g" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int -main () +main (void) { ; return 0; } _ACEOF -if ac_fn_c_try_compile "$LINENO"; then : +if ac_fn_c_try_compile "$LINENO" +then : ac_cv_prog_cc_g=yes fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ac_c_werror_flag=$ac_save_c_werror_flag fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_g" >&5 -$as_echo "$ac_cv_prog_cc_g" >&6; } -if test "$ac_test_CFLAGS" = set; then +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_g" >&5 +printf "%s\n" "$ac_cv_prog_cc_g" >&6; } +if test $ac_test_CFLAGS; then CFLAGS=$ac_save_CFLAGS elif test $ac_cv_prog_cc_g = yes; then if test "$GCC" = yes; then @@ -2773,94 +3238,144 @@ else CFLAGS= fi fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C89" >&5 -$as_echo_n "checking for $CC option to accept ISO C89... " >&6; } -if ${ac_cv_prog_cc_c89+:} false; then : - $as_echo_n "(cached) " >&6 -else - ac_cv_prog_cc_c89=no +ac_prog_cc_stdc=no +if test x$ac_prog_cc_stdc = xno +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CC option to enable C11 features" >&5 +printf %s "checking for $CC option to enable C11 features... " >&6; } +if test ${ac_cv_prog_cc_c11+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_cv_prog_cc_c11=no ac_save_CC=$CC cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ -#include -#include -struct stat; -/* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */ -struct buf { int x; }; -FILE * (*rcsopen) (struct buf *, struct stat *, int); -static char *e (p, i) - char **p; - int i; -{ - return p[i]; -} -static char *f (char * (*g) (char **, int), char **p, ...) -{ - char *s; - va_list v; - va_start (v,p); - s = g (p, va_arg (v,int)); - va_end (v); - return s; -} - -/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has - function prototypes and stuff, but not '\xHH' hex character constants. - These don't provoke an error unfortunately, instead are silently treated - as 'x'. The following induces an error, until -std is added to get - proper ANSI mode. Curiously '\x00'!='x' always comes out true, for an - array size at least. It's necessary to write '\x00'==0 to get something - that's true only with -std. */ -int osf4_cc_array ['\x00' == 0 ? 1 : -1]; +$ac_c_conftest_c11_program +_ACEOF +for ac_arg in '' -std=gnu11 +do + CC="$ac_save_CC $ac_arg" + if ac_fn_c_try_compile "$LINENO" +then : + ac_cv_prog_cc_c11=$ac_arg +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam + test "x$ac_cv_prog_cc_c11" != "xno" && break +done +rm -f conftest.$ac_ext +CC=$ac_save_CC +fi -/* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters - inside strings and character constants. */ -#define FOO(x) 'x' -int xlc6_cc_array[FOO(a) == 'x' ? 1 : -1]; +if test "x$ac_cv_prog_cc_c11" = xno +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 +printf "%s\n" "unsupported" >&6; } +else $as_nop + if test "x$ac_cv_prog_cc_c11" = x +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 +printf "%s\n" "none needed" >&6; } +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c11" >&5 +printf "%s\n" "$ac_cv_prog_cc_c11" >&6; } + CC="$CC $ac_cv_prog_cc_c11" +fi + ac_cv_prog_cc_stdc=$ac_cv_prog_cc_c11 + ac_prog_cc_stdc=c11 +fi +fi +if test x$ac_prog_cc_stdc = xno +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CC option to enable C99 features" >&5 +printf %s "checking for $CC option to enable C99 features... " >&6; } +if test ${ac_cv_prog_cc_c99+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_cv_prog_cc_c99=no +ac_save_CC=$CC +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$ac_c_conftest_c99_program +_ACEOF +for ac_arg in '' -std=gnu99 -std=c99 -c99 -qlanglvl=extc1x -qlanglvl=extc99 -AC99 -D_STDC_C99= +do + CC="$ac_save_CC $ac_arg" + if ac_fn_c_try_compile "$LINENO" +then : + ac_cv_prog_cc_c99=$ac_arg +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam + test "x$ac_cv_prog_cc_c99" != "xno" && break +done +rm -f conftest.$ac_ext +CC=$ac_save_CC +fi -int test (int i, double x); -struct s1 {int (*f) (int a);}; -struct s2 {int (*f) (double a);}; -int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int); -int argc; -char **argv; -int -main () -{ -return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]; - ; - return 0; -} +if test "x$ac_cv_prog_cc_c99" = xno +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 +printf "%s\n" "unsupported" >&6; } +else $as_nop + if test "x$ac_cv_prog_cc_c99" = x +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 +printf "%s\n" "none needed" >&6; } +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c99" >&5 +printf "%s\n" "$ac_cv_prog_cc_c99" >&6; } + CC="$CC $ac_cv_prog_cc_c99" +fi + ac_cv_prog_cc_stdc=$ac_cv_prog_cc_c99 + ac_prog_cc_stdc=c99 +fi +fi +if test x$ac_prog_cc_stdc = xno +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CC option to enable C89 features" >&5 +printf %s "checking for $CC option to enable C89 features... " >&6; } +if test ${ac_cv_prog_cc_c89+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_cv_prog_cc_c89=no +ac_save_CC=$CC +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$ac_c_conftest_c89_program _ACEOF -for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std \ - -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" +for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" do CC="$ac_save_CC $ac_arg" - if ac_fn_c_try_compile "$LINENO"; then : + if ac_fn_c_try_compile "$LINENO" +then : ac_cv_prog_cc_c89=$ac_arg fi -rm -f core conftest.err conftest.$ac_objext +rm -f core conftest.err conftest.$ac_objext conftest.beam test "x$ac_cv_prog_cc_c89" != "xno" && break done rm -f conftest.$ac_ext CC=$ac_save_CC - fi -# AC_CACHE_VAL -case "x$ac_cv_prog_cc_c89" in - x) - { $as_echo "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 -$as_echo "none needed" >&6; } ;; - xno) - { $as_echo "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 -$as_echo "unsupported" >&6; } ;; - *) - CC="$CC $ac_cv_prog_cc_c89" - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5 -$as_echo "$ac_cv_prog_cc_c89" >&6; } ;; -esac -if test "x$ac_cv_prog_cc_c89" != xno; then : +if test "x$ac_cv_prog_cc_c89" = xno +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 +printf "%s\n" "unsupported" >&6; } +else $as_nop + if test "x$ac_cv_prog_cc_c89" = x +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 +printf "%s\n" "none needed" >&6; } +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5 +printf "%s\n" "$ac_cv_prog_cc_c89" >&6; } + CC="$CC $ac_cv_prog_cc_c89" +fi + ac_cv_prog_cc_stdc=$ac_cv_prog_cc_c89 + ac_prog_cc_stdc=c89 +fi fi ac_ext=c @@ -2873,26 +3388,30 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu # Checks for libraries. if test "$do_threads" = "yes"; then - # Make sure we can run config.sub. -$SHELL "$ac_aux_dir/config.sub" sun4 >/dev/null 2>&1 || - as_fn_error $? "cannot run $SHELL $ac_aux_dir/config.sub" "$LINENO" 5 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking build system type" >&5 -$as_echo_n "checking build system type... " >&6; } -if ${ac_cv_build+:} false; then : - $as_echo_n "(cached) " >&6 -else + + + # Make sure we can run config.sub. +$SHELL "${ac_aux_dir}config.sub" sun4 >/dev/null 2>&1 || + as_fn_error $? "cannot run $SHELL ${ac_aux_dir}config.sub" "$LINENO" 5 + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking build system type" >&5 +printf %s "checking build system type... " >&6; } +if test ${ac_cv_build+y} +then : + printf %s "(cached) " >&6 +else $as_nop ac_build_alias=$build_alias test "x$ac_build_alias" = x && - ac_build_alias=`$SHELL "$ac_aux_dir/config.guess"` + ac_build_alias=`$SHELL "${ac_aux_dir}config.guess"` test "x$ac_build_alias" = x && as_fn_error $? "cannot guess build type; you must specify one" "$LINENO" 5 -ac_cv_build=`$SHELL "$ac_aux_dir/config.sub" $ac_build_alias` || - as_fn_error $? "$SHELL $ac_aux_dir/config.sub $ac_build_alias failed" "$LINENO" 5 +ac_cv_build=`$SHELL "${ac_aux_dir}config.sub" $ac_build_alias` || + as_fn_error $? "$SHELL ${ac_aux_dir}config.sub $ac_build_alias failed" "$LINENO" 5 fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_build" >&5 -$as_echo "$ac_cv_build" >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_build" >&5 +printf "%s\n" "$ac_cv_build" >&6; } case $ac_cv_build in *-*-*) ;; *) as_fn_error $? "invalid value of canonical build" "$LINENO" 5;; @@ -2911,21 +3430,22 @@ IFS=$ac_save_IFS case $build_os in *\ *) build_os=`echo "$build_os" | sed 's/ /-/g'`;; esac -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking host system type" >&5 -$as_echo_n "checking host system type... " >&6; } -if ${ac_cv_host+:} false; then : - $as_echo_n "(cached) " >&6 -else +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking host system type" >&5 +printf %s "checking host system type... " >&6; } +if test ${ac_cv_host+y} +then : + printf %s "(cached) " >&6 +else $as_nop if test "x$host_alias" = x; then ac_cv_host=$ac_cv_build else - ac_cv_host=`$SHELL "$ac_aux_dir/config.sub" $host_alias` || - as_fn_error $? "$SHELL $ac_aux_dir/config.sub $host_alias failed" "$LINENO" 5 + ac_cv_host=`$SHELL "${ac_aux_dir}config.sub" $host_alias` || + as_fn_error $? "$SHELL ${ac_aux_dir}config.sub $host_alias failed" "$LINENO" 5 fi fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_host" >&5 -$as_echo "$ac_cv_host" >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_host" >&5 +printf "%s\n" "$ac_cv_host" >&6; } case $ac_cv_host in *-*-*) ;; *) as_fn_error $? "invalid value of canonical host" "$LINENO" 5;; @@ -2944,11 +3464,12 @@ IFS=$ac_save_IFS case $host_os in *\ *) host_os=`echo "$host_os" | sed 's/ /-/g'`;; esac -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for a sed that does not truncate output" >&5 -$as_echo_n "checking for a sed that does not truncate output... " >&6; } -if ${ac_cv_path_SED+:} false; then : - $as_echo_n "(cached) " >&6 -else +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for a sed that does not truncate output" >&5 +printf %s "checking for a sed that does not truncate output... " >&6; } +if test ${ac_cv_path_SED+y} +then : + printf %s "(cached) " >&6 +else $as_nop ac_script=s/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb/ for ac_i in 1 2 3 4 5 6 7; do ac_script="$ac_script$as_nl$ac_script" @@ -2962,10 +3483,15 @@ else for as_dir in $PATH do IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_prog in sed gsed; do + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_prog in sed gsed + do for ac_exec_ext in '' $ac_executable_extensions; do - ac_path_SED="$as_dir/$ac_prog$ac_exec_ext" + ac_path_SED="$as_dir$ac_prog$ac_exec_ext" as_fn_executable_p "$ac_path_SED" || continue # Check for GNU ac_path_SED and select it if it is found. # Check for GNU $ac_path_SED @@ -2974,13 +3500,13 @@ case `"$ac_path_SED" --version 2>&1` in ac_cv_path_SED="$ac_path_SED" ac_path_SED_found=:;; *) ac_count=0 - $as_echo_n 0123456789 >"conftest.in" + printf %s 0123456789 >"conftest.in" while : do cat "conftest.in" "conftest.in" >"conftest.tmp" mv "conftest.tmp" "conftest.in" cp "conftest.in" "conftest.nl" - $as_echo '' >> "conftest.nl" + printf "%s\n" '' >> "conftest.nl" "$ac_path_SED" -f conftest.sed < "conftest.nl" >"conftest.out" 2>/dev/null || break diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break as_fn_arith $ac_count + 1 && ac_count=$as_val @@ -3008,8 +3534,8 @@ else fi fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_SED" >&5 -$as_echo "$ac_cv_path_SED" >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_SED" >&5 +printf "%s\n" "$ac_cv_path_SED" >&6; } SED="$ac_cv_path_SED" rm -f conftest.sed @@ -3019,40 +3545,36 @@ ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to run the C preprocessor" >&5 -$as_echo_n "checking how to run the C preprocessor... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking how to run the C preprocessor" >&5 +printf %s "checking how to run the C preprocessor... " >&6; } # On Suns, sometimes $CPP names a directory. if test -n "$CPP" && test -d "$CPP"; then CPP= fi if test -z "$CPP"; then - if ${ac_cv_prog_CPP+:} false; then : - $as_echo_n "(cached) " >&6 -else - # Double quotes because CPP needs to be expanded - for CPP in "$CC -E" "$CC -E -traditional-cpp" "/lib/cpp" + if test ${ac_cv_prog_CPP+y} +then : + printf %s "(cached) " >&6 +else $as_nop + # Double quotes because $CC needs to be expanded + for CPP in "$CC -E" "$CC -E -traditional-cpp" cpp /lib/cpp do ac_preproc_ok=false for ac_c_preproc_warn_flag in '' yes do # Use a header file that comes with gcc, so configuring glibc # with a fresh cross-compiler works. - # Prefer to if __STDC__ is defined, since - # exists even on freestanding compilers. # On the NeXT, cc -E runs the code through the compiler's parser, # not just through cpp. "Syntax error" is here to catch this case. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ -#ifdef __STDC__ -# include -#else -# include -#endif +#include Syntax error _ACEOF -if ac_fn_c_try_cpp "$LINENO"; then : +if ac_fn_c_try_cpp "$LINENO" +then : -else +else $as_nop # Broken: fails on valid input. continue fi @@ -3064,10 +3586,11 @@ rm -f conftest.err conftest.i conftest.$ac_ext /* end confdefs.h. */ #include _ACEOF -if ac_fn_c_try_cpp "$LINENO"; then : +if ac_fn_c_try_cpp "$LINENO" +then : # Broken: success on invalid input. continue -else +else $as_nop # Passes both tests. ac_preproc_ok=: break @@ -3077,7 +3600,8 @@ rm -f conftest.err conftest.i conftest.$ac_ext done # Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. rm -f conftest.i conftest.err conftest.$ac_ext -if $ac_preproc_ok; then : +if $ac_preproc_ok +then : break fi @@ -3089,29 +3613,24 @@ fi else ac_cv_prog_CPP=$CPP fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $CPP" >&5 -$as_echo "$CPP" >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CPP" >&5 +printf "%s\n" "$CPP" >&6; } ac_preproc_ok=false for ac_c_preproc_warn_flag in '' yes do # Use a header file that comes with gcc, so configuring glibc # with a fresh cross-compiler works. - # Prefer to if __STDC__ is defined, since - # exists even on freestanding compilers. # On the NeXT, cc -E runs the code through the compiler's parser, # not just through cpp. "Syntax error" is here to catch this case. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ -#ifdef __STDC__ -# include -#else -# include -#endif +#include Syntax error _ACEOF -if ac_fn_c_try_cpp "$LINENO"; then : +if ac_fn_c_try_cpp "$LINENO" +then : -else +else $as_nop # Broken: fails on valid input. continue fi @@ -3123,10 +3642,11 @@ rm -f conftest.err conftest.i conftest.$ac_ext /* end confdefs.h. */ #include _ACEOF -if ac_fn_c_try_cpp "$LINENO"; then : +if ac_fn_c_try_cpp "$LINENO" +then : # Broken: success on invalid input. continue -else +else $as_nop # Passes both tests. ac_preproc_ok=: break @@ -3136,11 +3656,12 @@ rm -f conftest.err conftest.i conftest.$ac_ext done # Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. rm -f conftest.i conftest.err conftest.$ac_ext -if $ac_preproc_ok; then : +if $ac_preproc_ok +then : -else - { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +else $as_nop + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "C preprocessor \"$CPP\" fails sanity check See \`config.log' for more details" "$LINENO" 5; } fi @@ -3152,11 +3673,12 @@ ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $ ac_compiler_gnu=$ac_cv_c_compiler_gnu -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for grep that handles long lines and -e" >&5 -$as_echo_n "checking for grep that handles long lines and -e... " >&6; } -if ${ac_cv_path_GREP+:} false; then : - $as_echo_n "(cached) " >&6 -else +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for grep that handles long lines and -e" >&5 +printf %s "checking for grep that handles long lines and -e... " >&6; } +if test ${ac_cv_path_GREP+y} +then : + printf %s "(cached) " >&6 +else $as_nop if test -z "$GREP"; then ac_path_GREP_found=false # Loop through the user's path and test for each of PROGNAME-LIST @@ -3164,10 +3686,15 @@ else for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin do IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_prog in grep ggrep; do + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_prog in grep ggrep + do for ac_exec_ext in '' $ac_executable_extensions; do - ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext" + ac_path_GREP="$as_dir$ac_prog$ac_exec_ext" as_fn_executable_p "$ac_path_GREP" || continue # Check for GNU ac_path_GREP and select it if it is found. # Check for GNU $ac_path_GREP @@ -3176,13 +3703,13 @@ case `"$ac_path_GREP" --version 2>&1` in ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;; *) ac_count=0 - $as_echo_n 0123456789 >"conftest.in" + printf %s 0123456789 >"conftest.in" while : do cat "conftest.in" "conftest.in" >"conftest.tmp" mv "conftest.tmp" "conftest.in" cp "conftest.in" "conftest.nl" - $as_echo 'GREP' >> "conftest.nl" + printf "%s\n" 'GREP' >> "conftest.nl" "$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break as_fn_arith $ac_count + 1 && ac_count=$as_val @@ -3210,16 +3737,17 @@ else fi fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_GREP" >&5 -$as_echo "$ac_cv_path_GREP" >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_GREP" >&5 +printf "%s\n" "$ac_cv_path_GREP" >&6; } GREP="$ac_cv_path_GREP" -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for egrep" >&5 -$as_echo_n "checking for egrep... " >&6; } -if ${ac_cv_path_EGREP+:} false; then : - $as_echo_n "(cached) " >&6 -else +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for egrep" >&5 +printf %s "checking for egrep... " >&6; } +if test ${ac_cv_path_EGREP+y} +then : + printf %s "(cached) " >&6 +else $as_nop if echo a | $GREP -E '(a|b)' >/dev/null 2>&1 then ac_cv_path_EGREP="$GREP -E" else @@ -3230,10 +3758,15 @@ else for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin do IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_prog in egrep; do + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_prog in egrep + do for ac_exec_ext in '' $ac_executable_extensions; do - ac_path_EGREP="$as_dir/$ac_prog$ac_exec_ext" + ac_path_EGREP="$as_dir$ac_prog$ac_exec_ext" as_fn_executable_p "$ac_path_EGREP" || continue # Check for GNU ac_path_EGREP and select it if it is found. # Check for GNU $ac_path_EGREP @@ -3242,13 +3775,13 @@ case `"$ac_path_EGREP" --version 2>&1` in ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;; *) ac_count=0 - $as_echo_n 0123456789 >"conftest.in" + printf %s 0123456789 >"conftest.in" while : do cat "conftest.in" "conftest.in" >"conftest.tmp" mv "conftest.tmp" "conftest.in" cp "conftest.in" "conftest.nl" - $as_echo 'EGREP' >> "conftest.nl" + printf "%s\n" 'EGREP' >> "conftest.nl" "$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break as_fn_arith $ac_count + 1 && ac_count=$as_val @@ -3277,8 +3810,8 @@ fi fi fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_EGREP" >&5 -$as_echo "$ac_cv_path_EGREP" >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_EGREP" >&5 +printf "%s\n" "$ac_cv_path_EGREP" >&6; } EGREP="$ac_cv_path_EGREP" @@ -3305,38 +3838,41 @@ if test "x$PTHREAD_CFLAGS$PTHREAD_LIBS" != "x"; then ax_pthread_save_CC="$CC" ax_pthread_save_CFLAGS="$CFLAGS" ax_pthread_save_LIBS="$LIBS" - if test "x$PTHREAD_CC" != "x"; then : + if test "x$PTHREAD_CC" != "x" +then : CC="$PTHREAD_CC" +fi + if test "x$PTHREAD_CXX" != "x" +then : + CXX="$PTHREAD_CXX" fi CFLAGS="$CFLAGS $PTHREAD_CFLAGS" LIBS="$PTHREAD_LIBS $LIBS" - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for pthread_join using $CC $PTHREAD_CFLAGS $PTHREAD_LIBS" >&5 -$as_echo_n "checking for pthread_join using $CC $PTHREAD_CFLAGS $PTHREAD_LIBS... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for pthread_join using $CC $PTHREAD_CFLAGS $PTHREAD_LIBS" >&5 +printf %s "checking for pthread_join using $CC $PTHREAD_CFLAGS $PTHREAD_LIBS... " >&6; } cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ -#ifdef __cplusplus -extern "C" -#endif char pthread_join (); int -main () +main (void) { return pthread_join (); ; return 0; } _ACEOF -if ac_fn_c_try_link "$LINENO"; then : +if ac_fn_c_try_link "$LINENO" +then : ax_pthread_ok=yes fi -rm -f core conftest.err conftest.$ac_objext \ +rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_pthread_ok" >&5 -$as_echo "$ax_pthread_ok" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_pthread_ok" >&5 +printf "%s\n" "$ax_pthread_ok" >&6; } if test "x$ax_pthread_ok" = "xno"; then PTHREAD_LIBS="" PTHREAD_CFLAGS="" @@ -3351,10 +3887,12 @@ fi # (e.g. DEC) have both -lpthread and -lpthreads, where one of the # libraries is broken (non-POSIX). -# Create a list of thread flags to try. Items starting with a "-" are -# C compiler flags, and other items are library names, except for "none" -# which indicates that we try without any flags at all, and "pthread-config" -# which is a program returning the flags for the Pth emulation library. +# Create a list of thread flags to try. Items with a "," contain both +# C compiler flags (before ",") and linker flags (after ","). Other items +# starting with a "-" are C compiler flags, and remaining items are +# library names, except for "none" which indicates that we try without +# any flags at all, and "pthread-config" which is a program returning +# the flags for the Pth emulation library. ax_pthread_flags="pthreads none -Kthread -pthread -pthreads -mthreads pthread --thread-safe -mt pthread-config" @@ -3412,11 +3950,12 @@ case $host_os in _ACEOF if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | - $EGREP "AX_PTHREAD_ZOS_MISSING" >/dev/null 2>&1; then : - { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: IBM z/OS requires -D_OPEN_THREADS or -D_UNIX03_THREADS to enable pthreads support." >&5 -$as_echo "$as_me: WARNING: IBM z/OS requires -D_OPEN_THREADS or -D_UNIX03_THREADS to enable pthreads support." >&2;} + $EGREP "AX_PTHREAD_ZOS_MISSING" >/dev/null 2>&1 +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: IBM z/OS requires -D_OPEN_THREADS or -D_UNIX03_THREADS to enable pthreads support." >&5 +printf "%s\n" "$as_me: WARNING: IBM z/OS requires -D_OPEN_THREADS or -D_UNIX03_THREADS to enable pthreads support." >&2;} fi -rm -f conftest* +rm -rf conftest* ;; @@ -3430,46 +3969,18 @@ rm -f conftest* # that too in a future libc.) So we'll check first for the # standard Solaris way of linking pthreads (-mt -lpthread). - ax_pthread_flags="-mt,pthread pthread $ax_pthread_flags" - ;; -esac - -# GCC generally uses -pthread, or -pthreads on some platforms (e.g. SPARC) - -if test "x$GCC" = "xyes"; then : - ax_pthread_flags="-pthread -pthreads $ax_pthread_flags" -fi - -# The presence of a feature test macro requesting re-entrant function -# definitions is, on some systems, a strong hint that pthreads support is -# correctly enabled - -case $host_os in - darwin* | hpux* | linux* | osf* | solaris*) - ax_pthread_check_macro="_REENTRANT" - ;; - - aix*) - ax_pthread_check_macro="_THREAD_SAFE" - ;; - - *) - ax_pthread_check_macro="--" + ax_pthread_flags="-mt,-lpthread pthread $ax_pthread_flags" ;; esac -if test "x$ax_pthread_check_macro" = "x--"; then : - ax_pthread_check_cond=0 -else - ax_pthread_check_cond="!defined($ax_pthread_check_macro)" -fi # Are we compiling with Clang? -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC is Clang" >&5 -$as_echo_n "checking whether $CC is Clang... " >&6; } -if ${ax_cv_PTHREAD_CLANG+:} false; then : - $as_echo_n "(cached) " >&6 -else +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether $CC is Clang" >&5 +printf %s "checking whether $CC is Clang... " >&6; } +if test ${ax_cv_PTHREAD_CLANG+y} +then : + printf %s "(cached) " >&6 +else $as_nop ax_cv_PTHREAD_CLANG=no # Note that Autoconf sets GCC=yes for Clang as well as GCC if test "x$GCC" = "xyes"; then @@ -3482,150 +3993,99 @@ else _ACEOF if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | - $EGREP "AX_PTHREAD_CC_IS_CLANG" >/dev/null 2>&1; then : + $EGREP "AX_PTHREAD_CC_IS_CLANG" >/dev/null 2>&1 +then : ax_cv_PTHREAD_CLANG=yes fi -rm -f conftest* +rm -rf conftest* fi fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_PTHREAD_CLANG" >&5 -$as_echo "$ax_cv_PTHREAD_CLANG" >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_PTHREAD_CLANG" >&5 +printf "%s\n" "$ax_cv_PTHREAD_CLANG" >&6; } ax_pthread_clang="$ax_cv_PTHREAD_CLANG" -ax_pthread_clang_warning=no -# Clang needs special handling, because older versions handle the -pthread -# option in a rather... idiosyncratic way +# GCC generally uses -pthread, or -pthreads on some platforms (e.g. SPARC) -if test "x$ax_pthread_clang" = "xyes"; then +# Note that for GCC and Clang -pthread generally implies -lpthread, +# except when -nostdlib is passed. +# This is problematic using libtool to build C++ shared libraries with pthread: +# [1] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=25460 +# [2] https://bugzilla.redhat.com/show_bug.cgi?id=661333 +# [3] https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=468555 +# To solve this, first try -pthread together with -lpthread for GCC - # Clang takes -pthread; it has never supported any other flag - - # (Note 1: This will need to be revisited if a system that Clang - # supports has POSIX threads in a separate library. This tends not - # to be the way of modern systems, but it's conceivable.) +if test "x$GCC" = "xyes" +then : + ax_pthread_flags="-pthread,-lpthread -pthread -pthreads $ax_pthread_flags" +fi - # (Note 2: On some systems, notably Darwin, -pthread is not needed - # to get POSIX threads support; the API is always present and - # active. We could reasonably leave PTHREAD_CFLAGS empty. But - # -pthread does define _REENTRANT, and while the Darwin headers - # ignore this macro, third-party headers might not.) +# Clang takes -pthread (never supported any other flag), but we'll try with -lpthread first - PTHREAD_CFLAGS="-pthread" - PTHREAD_LIBS= +if test "x$ax_pthread_clang" = "xyes" +then : + ax_pthread_flags="-pthread,-lpthread -pthread" +fi - ax_pthread_ok=yes - # However, older versions of Clang make a point of warning the user - # that, in an invocation where only linking and no compilation is - # taking place, the -pthread option has no effect ("argument unused - # during compilation"). They expect -pthread to be passed in only - # when source code is being compiled. - # - # Problem is, this is at odds with the way Automake and most other - # C build frameworks function, which is that the same flags used in - # compilation (CFLAGS) are also used in linking. Many systems - # supported by AX_PTHREAD require exactly this for POSIX threads - # support, and in fact it is often not straightforward to specify a - # flag that is used only in the compilation phase and not in - # linking. Such a scenario is extremely rare in practice. - # - # Even though use of the -pthread flag in linking would only print - # a warning, this can be a nuisance for well-run software projects - # that build with -Werror. So if the active version of Clang has - # this misfeature, we search for an option to squash it. +# The presence of a feature test macro requesting re-entrant function +# definitions is, on some systems, a strong hint that pthreads support is +# correctly enabled - { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether Clang needs flag to prevent \"argument unused\" warning when linking with -pthread" >&5 -$as_echo_n "checking whether Clang needs flag to prevent \"argument unused\" warning when linking with -pthread... " >&6; } -if ${ax_cv_PTHREAD_CLANG_NO_WARN_FLAG+:} false; then : - $as_echo_n "(cached) " >&6 -else - ax_cv_PTHREAD_CLANG_NO_WARN_FLAG=unknown - # Create an alternate version of $ac_link that compiles and - # links in two steps (.c -> .o, .o -> exe) instead of one - # (.c -> exe), because the warning occurs only in the second - # step - ax_pthread_save_ac_link="$ac_link" - ax_pthread_sed='s/conftest\.\$ac_ext/conftest.$ac_objext/g' - ax_pthread_link_step=`$as_echo "$ac_link" | sed "$ax_pthread_sed"` - ax_pthread_2step_ac_link="($ac_compile) && (echo ==== >&5) && ($ax_pthread_link_step)" - ax_pthread_save_CFLAGS="$CFLAGS" - for ax_pthread_try in '' -Qunused-arguments -Wno-unused-command-line-argument unknown; do - if test "x$ax_pthread_try" = "xunknown"; then : - break -fi - CFLAGS="-Werror -Wunknown-warning-option $ax_pthread_try -pthread $ax_pthread_save_CFLAGS" - ac_link="$ax_pthread_save_ac_link" - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -int main(void){return 0;} -_ACEOF -if ac_fn_c_try_link "$LINENO"; then : - ac_link="$ax_pthread_2step_ac_link" - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -int main(void){return 0;} -_ACEOF -if ac_fn_c_try_link "$LINENO"; then : - break -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext +case $host_os in + darwin* | hpux* | linux* | osf* | solaris*) + ax_pthread_check_macro="_REENTRANT" + ;; -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext - done - ac_link="$ax_pthread_save_ac_link" - CFLAGS="$ax_pthread_save_CFLAGS" - if test "x$ax_pthread_try" = "x"; then : - ax_pthread_try=no -fi - ax_cv_PTHREAD_CLANG_NO_WARN_FLAG="$ax_pthread_try" + aix*) + ax_pthread_check_macro="_THREAD_SAFE" + ;; + *) + ax_pthread_check_macro="--" + ;; +esac +if test "x$ax_pthread_check_macro" = "x--" +then : + ax_pthread_check_cond=0 +else $as_nop + ax_pthread_check_cond="!defined($ax_pthread_check_macro)" fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_PTHREAD_CLANG_NO_WARN_FLAG" >&5 -$as_echo "$ax_cv_PTHREAD_CLANG_NO_WARN_FLAG" >&6; } - - case "$ax_cv_PTHREAD_CLANG_NO_WARN_FLAG" in - no | unknown) ;; - *) PTHREAD_CFLAGS="$ax_cv_PTHREAD_CLANG_NO_WARN_FLAG $PTHREAD_CFLAGS" ;; - esac -fi # $ax_pthread_clang = yes if test "x$ax_pthread_ok" = "xno"; then for ax_pthread_try_flag in $ax_pthread_flags; do case $ax_pthread_try_flag in none) - { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether pthreads work without any flags" >&5 -$as_echo_n "checking whether pthreads work without any flags... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether pthreads work without any flags" >&5 +printf %s "checking whether pthreads work without any flags... " >&6; } ;; - -mt,pthread) - { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether pthreads work with -mt -lpthread" >&5 -$as_echo_n "checking whether pthreads work with -mt -lpthread... " >&6; } - PTHREAD_CFLAGS="-mt" - PTHREAD_LIBS="-lpthread" + *,*) + PTHREAD_CFLAGS=`echo $ax_pthread_try_flag | sed "s/^\(.*\),\(.*\)$/\1/"` + PTHREAD_LIBS=`echo $ax_pthread_try_flag | sed "s/^\(.*\),\(.*\)$/\2/"` + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether pthreads work with \"$PTHREAD_CFLAGS\" and \"$PTHREAD_LIBS\"" >&5 +printf %s "checking whether pthreads work with \"$PTHREAD_CFLAGS\" and \"$PTHREAD_LIBS\"... " >&6; } ;; -*) - { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether pthreads work with $ax_pthread_try_flag" >&5 -$as_echo_n "checking whether pthreads work with $ax_pthread_try_flag... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether pthreads work with $ax_pthread_try_flag" >&5 +printf %s "checking whether pthreads work with $ax_pthread_try_flag... " >&6; } PTHREAD_CFLAGS="$ax_pthread_try_flag" ;; pthread-config) # Extract the first word of "pthread-config", so it can be a program name with args. set dummy pthread-config; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_ax_pthread_config+:} false; then : - $as_echo_n "(cached) " >&6 -else +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ax_pthread_config+y} +then : + printf %s "(cached) " >&6 +else $as_nop if test -n "$ax_pthread_config"; then ac_cv_prog_ax_pthread_config="$ax_pthread_config" # Let the user override the test. else @@ -3633,11 +4093,15 @@ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_ax_pthread_config="yes" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done @@ -3649,15 +4113,16 @@ fi fi ax_pthread_config=$ac_cv_prog_ax_pthread_config if test -n "$ax_pthread_config"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_pthread_config" >&5 -$as_echo "$ax_pthread_config" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_pthread_config" >&5 +printf "%s\n" "$ax_pthread_config" >&6; } else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } fi - if test "x$ax_pthread_config" = "xno"; then : + if test "x$ax_pthread_config" = "xno" +then : continue fi PTHREAD_CFLAGS="`pthread-config --cflags`" @@ -3665,8 +4130,8 @@ fi ;; *) - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for the pthreads library -l$ax_pthread_try_flag" >&5 -$as_echo_n "checking for the pthreads library -l$ax_pthread_try_flag... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for the pthreads library -l$ax_pthread_try_flag" >&5 +printf %s "checking for the pthreads library -l$ax_pthread_try_flag... " >&6; } PTHREAD_LIBS="-l$ax_pthread_try_flag" ;; esac @@ -3692,10 +4157,16 @@ $as_echo_n "checking for the pthreads library -l$ax_pthread_try_flag... " >&6; } # if $ax_pthread_check_cond # error "$ax_pthread_check_macro must be defined" # endif - static void routine(void *a) { a = 0; } + static void *some_global = NULL; + static void routine(void *a) + { + /* To avoid any unused-parameter or + unused-but-set-parameter warning. */ + some_global = a; + } static void *start_routine(void *a) { return a; } int -main () +main (void) { pthread_t th; pthread_attr_t attr; pthread_create(&th, 0, start_routine, 0); @@ -3707,18 +4178,20 @@ pthread_t th; pthread_attr_t attr; return 0; } _ACEOF -if ac_fn_c_try_link "$LINENO"; then : +if ac_fn_c_try_link "$LINENO" +then : ax_pthread_ok=yes fi -rm -f core conftest.err conftest.$ac_objext \ +rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext CFLAGS="$ax_pthread_save_CFLAGS" LIBS="$ax_pthread_save_LIBS" - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_pthread_ok" >&5 -$as_echo "$ax_pthread_ok" >&6; } - if test "x$ax_pthread_ok" = "xyes"; then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_pthread_ok" >&5 +printf "%s\n" "$ax_pthread_ok" >&6; } + if test "x$ax_pthread_ok" = "xyes" +then : break fi @@ -3727,6 +4200,109 @@ fi done fi + +# Clang needs special handling, because older versions handle the -pthread +# option in a rather... idiosyncratic way + +if test "x$ax_pthread_clang" = "xyes"; then + + # Clang takes -pthread; it has never supported any other flag + + # (Note 1: This will need to be revisited if a system that Clang + # supports has POSIX threads in a separate library. This tends not + # to be the way of modern systems, but it's conceivable.) + + # (Note 2: On some systems, notably Darwin, -pthread is not needed + # to get POSIX threads support; the API is always present and + # active. We could reasonably leave PTHREAD_CFLAGS empty. But + # -pthread does define _REENTRANT, and while the Darwin headers + # ignore this macro, third-party headers might not.) + + # However, older versions of Clang make a point of warning the user + # that, in an invocation where only linking and no compilation is + # taking place, the -pthread option has no effect ("argument unused + # during compilation"). They expect -pthread to be passed in only + # when source code is being compiled. + # + # Problem is, this is at odds with the way Automake and most other + # C build frameworks function, which is that the same flags used in + # compilation (CFLAGS) are also used in linking. Many systems + # supported by AX_PTHREAD require exactly this for POSIX threads + # support, and in fact it is often not straightforward to specify a + # flag that is used only in the compilation phase and not in + # linking. Such a scenario is extremely rare in practice. + # + # Even though use of the -pthread flag in linking would only print + # a warning, this can be a nuisance for well-run software projects + # that build with -Werror. So if the active version of Clang has + # this misfeature, we search for an option to squash it. + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Clang needs flag to prevent \"argument unused\" warning when linking with -pthread" >&5 +printf %s "checking whether Clang needs flag to prevent \"argument unused\" warning when linking with -pthread... " >&6; } +if test ${ax_cv_PTHREAD_CLANG_NO_WARN_FLAG+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ax_cv_PTHREAD_CLANG_NO_WARN_FLAG=unknown + # Create an alternate version of $ac_link that compiles and + # links in two steps (.c -> .o, .o -> exe) instead of one + # (.c -> exe), because the warning occurs only in the second + # step + ax_pthread_save_ac_link="$ac_link" + ax_pthread_sed='s/conftest\.\$ac_ext/conftest.$ac_objext/g' + ax_pthread_link_step=`printf "%s\n" "$ac_link" | sed "$ax_pthread_sed"` + ax_pthread_2step_ac_link="($ac_compile) && (echo ==== >&5) && ($ax_pthread_link_step)" + ax_pthread_save_CFLAGS="$CFLAGS" + for ax_pthread_try in '' -Qunused-arguments -Wno-unused-command-line-argument unknown; do + if test "x$ax_pthread_try" = "xunknown" +then : + break +fi + CFLAGS="-Werror -Wunknown-warning-option $ax_pthread_try -pthread $ax_pthread_save_CFLAGS" + ac_link="$ax_pthread_save_ac_link" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +int main(void){return 0;} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_link="$ax_pthread_2step_ac_link" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +int main(void){return 0;} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + break +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + done + ac_link="$ax_pthread_save_ac_link" + CFLAGS="$ax_pthread_save_CFLAGS" + if test "x$ax_pthread_try" = "x" +then : + ax_pthread_try=no +fi + ax_cv_PTHREAD_CLANG_NO_WARN_FLAG="$ax_pthread_try" + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_PTHREAD_CLANG_NO_WARN_FLAG" >&5 +printf "%s\n" "$ax_cv_PTHREAD_CLANG_NO_WARN_FLAG" >&6; } + + case "$ax_cv_PTHREAD_CLANG_NO_WARN_FLAG" in + no | unknown) ;; + *) PTHREAD_CFLAGS="$ax_cv_PTHREAD_CLANG_NO_WARN_FLAG $PTHREAD_CFLAGS" ;; + esac + +fi # $ax_pthread_clang = yes + + + # Various other checks: if test "x$ax_pthread_ok" = "xyes"; then ax_pthread_save_CFLAGS="$CFLAGS" @@ -3735,51 +4311,53 @@ if test "x$ax_pthread_ok" = "xyes"; then LIBS="$PTHREAD_LIBS $LIBS" # Detect AIX lossage: JOINABLE attribute is called UNDETACHED. - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for joinable pthread attribute" >&5 -$as_echo_n "checking for joinable pthread attribute... " >&6; } -if ${ax_cv_PTHREAD_JOINABLE_ATTR+:} false; then : - $as_echo_n "(cached) " >&6 -else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for joinable pthread attribute" >&5 +printf %s "checking for joinable pthread attribute... " >&6; } +if test ${ax_cv_PTHREAD_JOINABLE_ATTR+y} +then : + printf %s "(cached) " >&6 +else $as_nop ax_cv_PTHREAD_JOINABLE_ATTR=unknown for ax_pthread_attr in PTHREAD_CREATE_JOINABLE PTHREAD_CREATE_UNDETACHED; do cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include int -main () +main (void) { int attr = $ax_pthread_attr; return attr /* ; */ ; return 0; } _ACEOF -if ac_fn_c_try_link "$LINENO"; then : +if ac_fn_c_try_link "$LINENO" +then : ax_cv_PTHREAD_JOINABLE_ATTR=$ax_pthread_attr; break fi -rm -f core conftest.err conftest.$ac_objext \ +rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext done fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_PTHREAD_JOINABLE_ATTR" >&5 -$as_echo "$ax_cv_PTHREAD_JOINABLE_ATTR" >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_PTHREAD_JOINABLE_ATTR" >&5 +printf "%s\n" "$ax_cv_PTHREAD_JOINABLE_ATTR" >&6; } if test "x$ax_cv_PTHREAD_JOINABLE_ATTR" != "xunknown" && \ test "x$ax_cv_PTHREAD_JOINABLE_ATTR" != "xPTHREAD_CREATE_JOINABLE" && \ - test "x$ax_pthread_joinable_attr_defined" != "xyes"; then : + test "x$ax_pthread_joinable_attr_defined" != "xyes" +then : -cat >>confdefs.h <<_ACEOF -#define PTHREAD_CREATE_JOINABLE $ax_cv_PTHREAD_JOINABLE_ATTR -_ACEOF +printf "%s\n" "#define PTHREAD_CREATE_JOINABLE $ax_cv_PTHREAD_JOINABLE_ATTR" >>confdefs.h ax_pthread_joinable_attr_defined=yes fi - { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether more special flags are required for pthreads" >&5 -$as_echo_n "checking whether more special flags are required for pthreads... " >&6; } -if ${ax_cv_PTHREAD_SPECIAL_FLAGS+:} false; then : - $as_echo_n "(cached) " >&6 -else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether more special flags are required for pthreads" >&5 +printf %s "checking whether more special flags are required for pthreads... " >&6; } +if test ${ax_cv_PTHREAD_SPECIAL_FLAGS+y} +then : + printf %s "(cached) " >&6 +else $as_nop ax_cv_PTHREAD_SPECIAL_FLAGS=no case $host_os in solaris*) @@ -3788,45 +4366,50 @@ else esac fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_PTHREAD_SPECIAL_FLAGS" >&5 -$as_echo "$ax_cv_PTHREAD_SPECIAL_FLAGS" >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_PTHREAD_SPECIAL_FLAGS" >&5 +printf "%s\n" "$ax_cv_PTHREAD_SPECIAL_FLAGS" >&6; } if test "x$ax_cv_PTHREAD_SPECIAL_FLAGS" != "xno" && \ - test "x$ax_pthread_special_flags_added" != "xyes"; then : + test "x$ax_pthread_special_flags_added" != "xyes" +then : PTHREAD_CFLAGS="$ax_cv_PTHREAD_SPECIAL_FLAGS $PTHREAD_CFLAGS" ax_pthread_special_flags_added=yes fi - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for PTHREAD_PRIO_INHERIT" >&5 -$as_echo_n "checking for PTHREAD_PRIO_INHERIT... " >&6; } -if ${ax_cv_PTHREAD_PRIO_INHERIT+:} false; then : - $as_echo_n "(cached) " >&6 -else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for PTHREAD_PRIO_INHERIT" >&5 +printf %s "checking for PTHREAD_PRIO_INHERIT... " >&6; } +if test ${ax_cv_PTHREAD_PRIO_INHERIT+y} +then : + printf %s "(cached) " >&6 +else $as_nop cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include int -main () +main (void) { int i = PTHREAD_PRIO_INHERIT; + return i; ; return 0; } _ACEOF -if ac_fn_c_try_link "$LINENO"; then : +if ac_fn_c_try_link "$LINENO" +then : ax_cv_PTHREAD_PRIO_INHERIT=yes -else +else $as_nop ax_cv_PTHREAD_PRIO_INHERIT=no fi -rm -f core conftest.err conftest.$ac_objext \ +rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_PTHREAD_PRIO_INHERIT" >&5 -$as_echo "$ax_cv_PTHREAD_PRIO_INHERIT" >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_PTHREAD_PRIO_INHERIT" >&5 +printf "%s\n" "$ax_cv_PTHREAD_PRIO_INHERIT" >&6; } if test "x$ax_cv_PTHREAD_PRIO_INHERIT" = "xyes" && \ - test "x$ax_pthread_prio_inherit_defined" != "xyes"; then : + test "x$ax_pthread_prio_inherit_defined" != "xyes" +then : -$as_echo "#define HAVE_PTHREAD_PRIO_INHERIT 1" >>confdefs.h +printf "%s\n" "#define HAVE_PTHREAD_PRIO_INHERIT 1" >>confdefs.h ax_pthread_prio_inherit_defined=yes @@ -3844,19 +4427,31 @@ fi #handle absolute path differently from PATH based program lookup case "x$CC" in #( x/*) : - if as_fn_executable_p ${CC}_r; then : + + if as_fn_executable_p ${CC}_r +then : PTHREAD_CC="${CC}_r" -fi ;; #( +fi + if test "x${CXX}" != "x" +then : + if as_fn_executable_p ${CXX}_r +then : + PTHREAD_CXX="${CXX}_r" +fi +fi + ;; #( *) : - for ac_prog in ${CC}_r + + for ac_prog in ${CC}_r do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_PTHREAD_CC+:} false; then : - $as_echo_n "(cached) " >&6 -else +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_PTHREAD_CC+y} +then : + printf %s "(cached) " >&6 +else $as_nop if test -n "$PTHREAD_CC"; then ac_cv_prog_PTHREAD_CC="$PTHREAD_CC" # Let the user override the test. else @@ -3864,11 +4459,15 @@ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_PTHREAD_CC="$ac_prog" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done @@ -3879,19 +4478,73 @@ fi fi PTHREAD_CC=$ac_cv_prog_PTHREAD_CC if test -n "$PTHREAD_CC"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $PTHREAD_CC" >&5 -$as_echo "$PTHREAD_CC" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $PTHREAD_CC" >&5 +printf "%s\n" "$PTHREAD_CC" >&6; } else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } fi test -n "$PTHREAD_CC" && break done test -n "$PTHREAD_CC" || PTHREAD_CC="$CC" - ;; -esac ;; #( + + if test "x${CXX}" != "x" +then : + for ac_prog in ${CXX}_r +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_PTHREAD_CXX+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$PTHREAD_CXX"; then + ac_cv_prog_PTHREAD_CXX="$PTHREAD_CXX" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_PTHREAD_CXX="$ac_prog" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +PTHREAD_CXX=$ac_cv_prog_PTHREAD_CXX +if test -n "$PTHREAD_CXX"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $PTHREAD_CXX" >&5 +printf "%s\n" "$PTHREAD_CXX" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + test -n "$PTHREAD_CXX" && break +done +test -n "$PTHREAD_CXX" || PTHREAD_CXX="$CXX" + +fi + + ;; +esac + ;; #( *) : ;; esac @@ -3901,6 +4554,8 @@ esac fi test -n "$PTHREAD_CC" || PTHREAD_CC="$CC" +test -n "$PTHREAD_CXX" || PTHREAD_CXX="$CXX" + @@ -3909,16 +4564,16 @@ test -n "$PTHREAD_CC" || PTHREAD_CC="$CC" # Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND: if test "x$ax_pthread_ok" = "xyes"; then -$as_echo "#define HAVE_PTHREAD 1" >>confdefs.h +printf "%s\n" "#define HAVE_PTHREAD 1" >>confdefs.h : else ax_pthread_ok=no - { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: No threads on this architecture" >&5 -$as_echo "$as_me: WARNING: No threads on this architecture" >&2;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: No threads on this architecture" >&5 +printf "%s\n" "$as_me: WARNING: No threads on this architecture" >&2;} -$as_echo "#define NOTHREADS 1" >>confdefs.h +printf "%s\n" "#define NOTHREADS 1" >>confdefs.h do_threads="no" fi @@ -3931,7 +4586,7 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu else -$as_echo "#define NOTHREADS 1" >>confdefs.h +printf "%s\n" "#define NOTHREADS 1" >>confdefs.h fi @@ -3944,17 +4599,18 @@ fi if test "$do_threads" = "yes"; then # bugger fails on macosx -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for timed semaphore wait" >&5 -$as_echo_n "checking for timed semaphore wait... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for timed semaphore wait" >&5 +printf %s "checking for timed semaphore wait... " >&6; } # we may use these when checking for semaphores LDFLAGS="$LDFLAGS $PTHREAD_LIBS" CFLAGS="$CFLAGS $PTHREAD_CFLAGS" -if test "$cross_compiling" = yes; then : - { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +if test "$cross_compiling" = yes +then : + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "cannot run test program while cross compiling See \`config.log' for more details" "$LINENO" 5; } -else +else $as_nop cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include @@ -3979,15 +4635,16 @@ else } _ACEOF -if ac_fn_c_try_run "$LINENO"; then : - { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -$as_echo "yes" >&6; } +if ac_fn_c_try_run "$LINENO" +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } -$as_echo "#define HAVE_SEM 1" >>confdefs.h +printf "%s\n" "#define HAVE_SEM 1" >>confdefs.h -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } fi rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ @@ -3996,11 +4653,12 @@ fi # check for capability of setting thread name -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing pthread_setname_np" >&5 -$as_echo_n "checking for library containing pthread_setname_np... " >&6; } -if ${ac_cv_search_pthread_setname_np+:} false; then : - $as_echo_n "(cached) " >&6 -else +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for library containing pthread_setname_np" >&5 +printf %s "checking for library containing pthread_setname_np... " >&6; } +if test ${ac_cv_search_pthread_setname_np+y} +then : + printf %s "(cached) " >&6 +else $as_nop ac_func_search_save_LIBS=$LIBS cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -4008,49 +4666,51 @@ cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ -#ifdef __cplusplus -extern "C" -#endif char pthread_setname_np (); int -main () +main (void) { return pthread_setname_np (); ; return 0; } _ACEOF -for ac_lib in '' ; do +for ac_lib in '' +do if test -z "$ac_lib"; then ac_res="none required" else ac_res=-l$ac_lib LIBS="-l$ac_lib $PTHREAD_CFLAGS $ac_func_search_save_LIBS" fi - if ac_fn_c_try_link "$LINENO"; then : + if ac_fn_c_try_link "$LINENO" +then : ac_cv_search_pthread_setname_np=$ac_res fi -rm -f core conftest.err conftest.$ac_objext \ +rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext - if ${ac_cv_search_pthread_setname_np+:} false; then : + if test ${ac_cv_search_pthread_setname_np+y} +then : break fi done -if ${ac_cv_search_pthread_setname_np+:} false; then : +if test ${ac_cv_search_pthread_setname_np+y} +then : -else +else $as_nop ac_cv_search_pthread_setname_np=no fi rm conftest.$ac_ext LIBS=$ac_func_search_save_LIBS fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_pthread_setname_np" >&5 -$as_echo "$ac_cv_search_pthread_setname_np" >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_pthread_setname_np" >&5 +printf "%s\n" "$ac_cv_search_pthread_setname_np" >&6; } ac_res=$ac_cv_search_pthread_setname_np -if test "$ac_res" != no; then : +if test "$ac_res" != no +then : test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" -$as_echo "#define HAVE_THREADNAME 1" >>confdefs.h +printf "%s\n" "#define HAVE_THREADNAME 1" >>confdefs.h fi @@ -4084,8 +4744,8 @@ _ACEOF case $ac_val in #( *${as_nl}*) case $ac_var in #( - *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 -$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; + *_cv_*) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 +printf "%s\n" "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; esac case $ac_var in #( _ | IFS | as_nl) ;; #( @@ -4115,15 +4775,15 @@ $as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; /^ac_cv_env_/b end t clear :clear - s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/ + s/^\([^=]*\)=\(.*[{}].*\)$/test ${\1+y} || &/ t end s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/ :end' >>confcache if diff "$cache_file" confcache >/dev/null 2>&1; then :; else if test -w "$cache_file"; then if test "x$cache_file" != "x/dev/null"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: updating cache $cache_file" >&5 -$as_echo "$as_me: updating cache $cache_file" >&6;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: updating cache $cache_file" >&5 +printf "%s\n" "$as_me: updating cache $cache_file" >&6;} if test ! -f "$cache_file" || test -h "$cache_file"; then cat confcache >"$cache_file" else @@ -4137,8 +4797,8 @@ $as_echo "$as_me: updating cache $cache_file" >&6;} fi fi else - { $as_echo "$as_me:${as_lineno-$LINENO}: not updating unwritable cache $cache_file" >&5 -$as_echo "$as_me: not updating unwritable cache $cache_file" >&6;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: not updating unwritable cache $cache_file" >&5 +printf "%s\n" "$as_me: not updating unwritable cache $cache_file" >&6;} fi fi rm -f confcache @@ -4155,7 +4815,7 @@ U= for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue # 1. Remove the extension, and $U if already installed. ac_script='s/\$U\././;s/\.o$//;s/\.obj$//' - ac_i=`$as_echo "$ac_i" | sed "$ac_script"` + ac_i=`printf "%s\n" "$ac_i" | sed "$ac_script"` # 2. Prepend LIBOBJDIR. When used with automake>=1.10 LIBOBJDIR # will be set to the directory where LIBOBJS objects are built. as_fn_append ac_libobjs " \${LIBOBJDIR}$ac_i\$U.$ac_objext" @@ -4171,8 +4831,8 @@ LTLIBOBJS=$ac_ltlibobjs ac_write_fail=0 ac_clean_files_save=$ac_clean_files ac_clean_files="$ac_clean_files $CONFIG_STATUS" -{ $as_echo "$as_me:${as_lineno-$LINENO}: creating $CONFIG_STATUS" >&5 -$as_echo "$as_me: creating $CONFIG_STATUS" >&6;} +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: creating $CONFIG_STATUS" >&5 +printf "%s\n" "$as_me: creating $CONFIG_STATUS" >&6;} as_write_fail=0 cat >$CONFIG_STATUS <<_ASEOF || as_write_fail=1 #! $SHELL @@ -4195,14 +4855,16 @@ cat >>$CONFIG_STATUS <<\_ASEOF || as_write_fail=1 # Be more Bourne compatible DUALCASE=1; export DUALCASE # for MKS sh -if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then : +as_nop=: +if test ${ZSH_VERSION+y} && (emulate sh) >/dev/null 2>&1 +then : emulate sh NULLCMD=: # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which # is contrary to our usage. Disable this feature. alias -g '${1+"$@"}'='"$@"' setopt NO_GLOB_SUBST -else +else $as_nop case `(set -o) 2>/dev/null` in #( *posix*) : set -o posix ;; #( @@ -4212,46 +4874,46 @@ esac fi + +# Reset variables that may have inherited troublesome values from +# the environment. + +# IFS needs to be set, to space, tab, and newline, in precisely that order. +# (If _AS_PATH_WALK were called with IFS unset, it would have the +# side effect of setting IFS to empty, thus disabling word splitting.) +# Quoting is to prevent editors from complaining about space-tab. as_nl=' ' export as_nl -# Printing a long string crashes Solaris 7 /usr/bin/printf. -as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' -as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo -as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo -# Prefer a ksh shell builtin over an external printf program on Solaris, -# but without wasting forks for bash or zsh. -if test -z "$BASH_VERSION$ZSH_VERSION" \ - && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then - as_echo='print -r --' - as_echo_n='print -rn --' -elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then - as_echo='printf %s\n' - as_echo_n='printf %s' -else - if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then - as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"' - as_echo_n='/usr/ucb/echo -n' - else - as_echo_body='eval expr "X$1" : "X\\(.*\\)"' - as_echo_n_body='eval - arg=$1; - case $arg in #( - *"$as_nl"*) - expr "X$arg" : "X\\(.*\\)$as_nl"; - arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;; - esac; - expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl" - ' - export as_echo_n_body - as_echo_n='sh -c $as_echo_n_body as_echo' - fi - export as_echo_body - as_echo='sh -c $as_echo_body as_echo' -fi +IFS=" "" $as_nl" + +PS1='$ ' +PS2='> ' +PS4='+ ' + +# Ensure predictable behavior from utilities with locale-dependent output. +LC_ALL=C +export LC_ALL +LANGUAGE=C +export LANGUAGE + +# We cannot yet rely on "unset" to work, but we need these variables +# to be unset--not just set to an empty or harmless value--now, to +# avoid bugs in old shells (e.g. pre-3.0 UWIN ksh). This construct +# also avoids known problems related to "unset" and subshell syntax +# in other old shells (e.g. bash 2.01 and pdksh 5.2.14). +for as_var in BASH_ENV ENV MAIL MAILPATH CDPATH +do eval test \${$as_var+y} \ + && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : +done + +# Ensure that fds 0, 1, and 2 are open. +if (exec 3>&0) 2>/dev/null; then :; else exec 0&1) 2>/dev/null; then :; else exec 1>/dev/null; fi +if (exec 3>&2) ; then :; else exec 2>/dev/null; fi # The user is always right. -if test "${PATH_SEPARATOR+set}" != set; then +if ${PATH_SEPARATOR+false} :; then PATH_SEPARATOR=: (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || @@ -4260,13 +4922,6 @@ if test "${PATH_SEPARATOR+set}" != set; then fi -# IFS -# We need space, tab and new line, in precisely that order. Quoting is -# there to prevent editors from complaining about space-tab. -# (If _AS_PATH_WALK were called with IFS unset, it would disable word -# splitting by setting IFS to empty value.) -IFS=" "" $as_nl" - # Find who we are. Look in the path if we contain no directory separator. as_myself= case $0 in #(( @@ -4275,8 +4930,12 @@ case $0 in #(( for as_dir in $PATH do IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + test -r "$as_dir$0" && as_myself=$as_dir$0 && break done IFS=$as_save_IFS @@ -4288,30 +4947,10 @@ if test "x$as_myself" = x; then as_myself=$0 fi if test ! -f "$as_myself"; then - $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 + printf "%s\n" "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 exit 1 fi -# Unset variables that we do not need and which cause bugs (e.g. in -# pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1" -# suppresses any "Segmentation fault" message there. '((' could -# trigger a bug in pdksh 5.2.14. -for as_var in BASH_ENV ENV MAIL MAILPATH -do eval test x\${$as_var+set} = xset \ - && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : -done -PS1='$ ' -PS2='> ' -PS4='+ ' - -# NLS nuisances. -LC_ALL=C -export LC_ALL -LANGUAGE=C -export LANGUAGE - -# CDPATH. -(unset CDPATH) >/dev/null 2>&1 && unset CDPATH # as_fn_error STATUS ERROR [LINENO LOG_FD] @@ -4324,13 +4963,14 @@ as_fn_error () as_status=$1; test $as_status -eq 0 && as_status=1 if test "$4"; then as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack - $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 fi - $as_echo "$as_me: error: $2" >&2 + printf "%s\n" "$as_me: error: $2" >&2 as_fn_exit $as_status } # as_fn_error + # as_fn_set_status STATUS # ----------------------- # Set $? to STATUS, without forking. @@ -4357,18 +4997,20 @@ as_fn_unset () { eval $1=; unset $1;} } as_unset=as_fn_unset + # as_fn_append VAR VALUE # ---------------------- # Append the text in VALUE to the end of the definition contained in VAR. Take # advantage of any shell optimizations that allow amortized linear growth over # repeated appends, instead of the typical quadratic growth present in naive # implementations. -if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then : +if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null +then : eval 'as_fn_append () { eval $1+=\$2 }' -else +else $as_nop as_fn_append () { eval $1=\$$1\$2 @@ -4380,12 +5022,13 @@ fi # as_fn_append # Perform arithmetic evaluation on the ARGs, and store the result in the # global $as_val. Take advantage of shells that can avoid forks. The arguments # must be portable across $(()) and expr. -if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then : +if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null +then : eval 'as_fn_arith () { as_val=$(( $* )) }' -else +else $as_nop as_fn_arith () { as_val=`expr "$@" || test $? -eq 1` @@ -4416,7 +5059,7 @@ as_me=`$as_basename -- "$0" || $as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ X"$0" : 'X\(//\)$' \| \ X"$0" : 'X\(/\)' \| . 2>/dev/null || -$as_echo X/"$0" | +printf "%s\n" X/"$0" | sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/ q @@ -4438,6 +5081,10 @@ as_cr_Letters=$as_cr_letters$as_cr_LETTERS as_cr_digits='0123456789' as_cr_alnum=$as_cr_Letters$as_cr_digits + +# Determine whether it's possible to make 'echo' print without a newline. +# These variables are no longer used directly by Autoconf, but are AC_SUBSTed +# for compatibility with existing Makefiles. ECHO_C= ECHO_N= ECHO_T= case `echo -n x` in #((((( -n*) @@ -4451,6 +5098,12 @@ case `echo -n x` in #((((( ECHO_N='-n';; esac +# For backward compatibility with old third-party macros, we provide +# the shell variables $as_echo and $as_echo_n. New code should use +# AS_ECHO(["message"]) and AS_ECHO_N(["message"]), respectively. +as_echo='printf %s\n' +as_echo_n='printf %s' + rm -f conf$$ conf$$.exe conf$$.file if test -d conf$$.dir; then rm -f conf$$.dir/conf$$.file @@ -4492,7 +5145,7 @@ as_fn_mkdir_p () as_dirs= while :; do case $as_dir in #( - *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( + *\'*) as_qdir=`printf "%s\n" "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( *) as_qdir=$as_dir;; esac as_dirs="'$as_qdir' $as_dirs" @@ -4501,7 +5154,7 @@ $as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$as_dir" : 'X\(//\)[^/]' \| \ X"$as_dir" : 'X\(//\)$' \| \ X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || -$as_echo X"$as_dir" | +printf "%s\n" X"$as_dir" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q @@ -4564,7 +5217,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # values after options handling. ac_log=" This file was extended by $as_me, which was -generated by GNU Autoconf 2.69. Invocation command line was +generated by GNU Autoconf 2.71. Invocation command line was CONFIG_FILES = $CONFIG_FILES CONFIG_HEADERS = $CONFIG_HEADERS @@ -4622,14 +5275,16 @@ $config_headers Report bugs to the package provider." _ACEOF +ac_cs_config=`printf "%s\n" "$ac_configure_args" | sed "$ac_safe_unquote"` +ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\''/g"` cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 -ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" +ac_cs_config='$ac_cs_config_escaped' ac_cs_version="\\ config.status -configured by $0, generated by GNU Autoconf 2.69, +configured by $0, generated by GNU Autoconf 2.71, with options \\"\$ac_cs_config\\" -Copyright (C) 2012 Free Software Foundation, Inc. +Copyright (C) 2021 Free Software Foundation, Inc. This config.status script is free software; the Free Software Foundation gives unlimited permission to copy, distribute and modify it." @@ -4666,15 +5321,15 @@ do -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) ac_cs_recheck=: ;; --version | --versio | --versi | --vers | --ver | --ve | --v | -V ) - $as_echo "$ac_cs_version"; exit ;; + printf "%s\n" "$ac_cs_version"; exit ;; --config | --confi | --conf | --con | --co | --c ) - $as_echo "$ac_cs_config"; exit ;; + printf "%s\n" "$ac_cs_config"; exit ;; --debug | --debu | --deb | --de | --d | -d ) debug=: ;; --file | --fil | --fi | --f ) $ac_shift case $ac_optarg in - *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; + *\'*) ac_optarg=`printf "%s\n" "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; '') as_fn_error $? "missing file argument" ;; esac as_fn_append CONFIG_FILES " '$ac_optarg'" @@ -4682,7 +5337,7 @@ do --header | --heade | --head | --hea ) $ac_shift case $ac_optarg in - *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; + *\'*) ac_optarg=`printf "%s\n" "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; esac as_fn_append CONFIG_HEADERS " '$ac_optarg'" ac_need_defaults=false;; @@ -4691,7 +5346,7 @@ do as_fn_error $? "ambiguous option: \`$1' Try \`$0 --help' for more information.";; --help | --hel | -h ) - $as_echo "$ac_cs_usage"; exit ;; + printf "%s\n" "$ac_cs_usage"; exit ;; -q | -quiet | --quiet | --quie | --qui | --qu | --q \ | -silent | --silent | --silen | --sile | --sil | --si | --s) ac_cs_silent=: ;; @@ -4719,7 +5374,7 @@ cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 if \$ac_cs_recheck; then set X $SHELL '$0' $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion shift - \$as_echo "running CONFIG_SHELL=$SHELL \$*" >&6 + \printf "%s\n" "running CONFIG_SHELL=$SHELL \$*" >&6 CONFIG_SHELL='$SHELL' export CONFIG_SHELL exec "\$@" @@ -4733,7 +5388,7 @@ exec 5>>config.log sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX ## Running $as_me. ## _ASBOX - $as_echo "$ac_log" + printf "%s\n" "$ac_log" } >&5 _ACEOF @@ -4759,8 +5414,8 @@ done # We use the long form for the default assignment because of an extremely # bizarre bug on SunOS 4.1.3. if $ac_need_defaults; then - test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files - test "${CONFIG_HEADERS+set}" = set || CONFIG_HEADERS=$config_headers + test ${CONFIG_FILES+y} || CONFIG_FILES=$config_files + test ${CONFIG_HEADERS+y} || CONFIG_HEADERS=$config_headers fi # Have a temporary directory for convenience. Make it in the build tree @@ -5096,7 +5751,7 @@ do esac || as_fn_error 1 "cannot find input file: \`$ac_f'" "$LINENO" 5;; esac - case $ac_f in *\'*) ac_f=`$as_echo "$ac_f" | sed "s/'/'\\\\\\\\''/g"`;; esac + case $ac_f in *\'*) ac_f=`printf "%s\n" "$ac_f" | sed "s/'/'\\\\\\\\''/g"`;; esac as_fn_append ac_file_inputs " '$ac_f'" done @@ -5104,17 +5759,17 @@ do # use $as_me), people would be surprised to read: # /* config.h. Generated by config.status. */ configure_input='Generated from '` - $as_echo "$*" | sed 's|^[^:]*/||;s|:[^:]*/|, |g' + printf "%s\n" "$*" | sed 's|^[^:]*/||;s|:[^:]*/|, |g' `' by configure.' if test x"$ac_file" != x-; then configure_input="$ac_file. $configure_input" - { $as_echo "$as_me:${as_lineno-$LINENO}: creating $ac_file" >&5 -$as_echo "$as_me: creating $ac_file" >&6;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: creating $ac_file" >&5 +printf "%s\n" "$as_me: creating $ac_file" >&6;} fi # Neutralize special characters interpreted by sed in replacement strings. case $configure_input in #( *\&* | *\|* | *\\* ) - ac_sed_conf_input=`$as_echo "$configure_input" | + ac_sed_conf_input=`printf "%s\n" "$configure_input" | sed 's/[\\\\&|]/\\\\&/g'`;; #( *) ac_sed_conf_input=$configure_input;; esac @@ -5131,7 +5786,7 @@ $as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$ac_file" : 'X\(//\)[^/]' \| \ X"$ac_file" : 'X\(//\)$' \| \ X"$ac_file" : 'X\(/\)' \| . 2>/dev/null || -$as_echo X"$ac_file" | +printf "%s\n" X"$ac_file" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q @@ -5155,9 +5810,9 @@ $as_echo X"$ac_file" | case "$ac_dir" in .) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; *) - ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'` + ac_dir_suffix=/`printf "%s\n" "$ac_dir" | sed 's|^\.[\\/]||'` # A ".." for each directory in $ac_dir_suffix. - ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` + ac_top_builddir_sub=`printf "%s\n" "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` case $ac_top_builddir_sub in "") ac_top_builddir_sub=. ac_top_build_prefix= ;; *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; @@ -5210,8 +5865,8 @@ ac_sed_dataroot=' case `eval "sed -n \"\$ac_sed_dataroot\" $ac_file_inputs"` in *datarootdir*) ac_datarootdir_seen=yes;; *@datadir@*|*@docdir@*|*@infodir@*|*@localedir@*|*@mandir@*) - { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5 -$as_echo "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5 +printf "%s\n" "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;} _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_datarootdir_hack=' @@ -5253,9 +5908,9 @@ test -z "$ac_datarootdir_hack$ac_datarootdir_seen" && { ac_out=`sed -n '/\${datarootdir}/p' "$ac_tmp/out"`; test -n "$ac_out"; } && { ac_out=`sed -n '/^[ ]*datarootdir[ ]*:*=/p' \ "$ac_tmp/out"`; test -z "$ac_out"; } && - { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file contains a reference to the variable \`datarootdir' + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file contains a reference to the variable \`datarootdir' which seems to be undefined. Please make sure it is defined" >&5 -$as_echo "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir' +printf "%s\n" "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir' which seems to be undefined. Please make sure it is defined" >&2;} rm -f "$ac_tmp/stdin" @@ -5271,20 +5926,20 @@ which seems to be undefined. Please make sure it is defined" >&2;} # if test x"$ac_file" != x-; then { - $as_echo "/* $configure_input */" \ + printf "%s\n" "/* $configure_input */" >&1 \ && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" } >"$ac_tmp/config.h" \ || as_fn_error $? "could not create $ac_file" "$LINENO" 5 if diff "$ac_file" "$ac_tmp/config.h" >/dev/null 2>&1; then - { $as_echo "$as_me:${as_lineno-$LINENO}: $ac_file is unchanged" >&5 -$as_echo "$as_me: $ac_file is unchanged" >&6;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: $ac_file is unchanged" >&5 +printf "%s\n" "$as_me: $ac_file is unchanged" >&6;} else rm -f "$ac_file" mv "$ac_tmp/config.h" "$ac_file" \ || as_fn_error $? "could not create $ac_file" "$LINENO" 5 fi else - $as_echo "/* $configure_input */" \ + printf "%s\n" "/* $configure_input */" >&1 \ && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" \ || as_fn_error $? "could not create -" "$LINENO" 5 fi @@ -5325,7 +5980,8 @@ if test "$no_create" != yes; then $ac_cs_success || as_fn_exit 1 fi if test -n "$ac_unrecognized_opts" && test "$enable_option_checking" != no; then - { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: unrecognized options: $ac_unrecognized_opts" >&5 -$as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: unrecognized options: $ac_unrecognized_opts" >&5 +printf "%s\n" "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;} fi + diff --git a/configure.ac b/configure.ac index 12dd618..8df0b41 100644 --- a/configure.ac +++ b/configure.ac @@ -1,12 +1,13 @@ # -*- Autoconf -*- # Process this file with autoconf to produce a configure script. -AC_PREREQ([2.67]) -AC_INIT([lfe]) +AC_PREREQ([2.71]) +AC_INIT +AC_CONFIG_SRCDIR([lfe]) AC_CONFIG_SRCDIR([src/lfe.c]) AC_CONFIG_HEADERS([src/config.h]) AC_CONFIG_AUX_DIR([build/autoconf]) AC_CONFIG_FILES([src/Makevars]) -m4_include([tools/ax_pthread.m4]) +AC_CONFIG_MACRO_DIRS([tools]) dnl AC_LANG(C) AC_ARG_ENABLE([threads], [AS_HELP_STRING([--disable-threads],[do not use threads])], @@ -55,8 +56,7 @@ AC_MSG_CHECKING(for timed semaphore wait) # we may use these when checking for semaphores LDFLAGS="$LDFLAGS $PTHREAD_LIBS" CFLAGS="$CFLAGS $PTHREAD_CFLAGS" -AC_TRY_RUN(dnl -[#include +AC_RUN_IFELSE([AC_LANG_SOURCE([[#include #include #include #include @@ -76,11 +76,9 @@ AC_TRY_RUN(dnl } exit(0); } -], -AC_MSG_RESULT(yes) -AC_DEFINE([HAVE_SEM], [1], [timed sem wait]), -AC_MSG_RESULT(no) -) +]])],[AC_MSG_RESULT(yes) +AC_DEFINE(HAVE_SEM, 1, timed sem wait)],[AC_MSG_RESULT(no) +],[]) # check for capability of setting thread name AC_SEARCH_LIBS([pthread_setname_np], [], AC_DEFINE([HAVE_THREADNAME],1, [chg thread name]), [], [$PTHREAD_CFLAGS]) diff --git a/cran-comments.md b/cran-comments.md new file mode 100644 index 0000000..6e05588 --- /dev/null +++ b/cran-comments.md @@ -0,0 +1,11 @@ +## Test environments +* local R installation, R 4.2.2 on Ubuntu 20.04 +* win-builder: devel, release, oldrelease + +## R CMD check results + +0 errors | 0 warnings | 0 note + +## Package adopted + +* This version solves the issue raised by CRAN maintainers: "warning: function declaration isn’t a prototype". diff --git a/inst/NEWS.Rd b/inst/NEWS.Rd index cbe8d59..1b2d309 100644 --- a/inst/NEWS.Rd +++ b/inst/NEWS.Rd @@ -1,5 +1,44 @@ \name{NEWS} \title{lfe news} + +\section{Changes in version 2.9-0}{ + \itemize{ + \item Fix issue #3, "warning: a function declaration without a prototype is deprecated in all versions of C", fixed by @pachadotdev + \item Internal change in code indentation + \item Rename Crowsum to crowsum @pachadotdev + } +} + + +\section{Changes in version 2.8-8}{ + \itemize{ + \item Adapt code for changes in R 4.2 USE_FC_LEN_T, fixed by @pachadotdev + } +} + +\section{Changes in version 2.8-7}{ + \itemize{ + \item Adapt code for changes in R 4.1 and SET_REFCNT, revert previous commit, as suggested by @SebKrantz on Issue https://github.com/sgaure/lfe/issues/49 + } +} + +\section{Changes in version 2.8-6}{ + \itemize{ + \item Package is un-archived from CRAN and temporarily adopted by Matthieu Stigler. + \item Use now doi function + \item CRAN request: do not use 'dontrun' or 'donttest' + \item CRAN request: remove <<- + \item CRAN request: reset options to defaults + \item CRAN request: use at most 2 cores in examples tests etc + \item Change code in waldtest.R to be more robust to variable ordering with multi-way clustering, as suggested by @grantmcdermott + \item Accept Pull request "Catch duplicate (false) reference levels for interacted FEs" @grantmcdermott + \item Fix a sprintf issue, apparently \code{sprintf('\%.8e', 1)} is not legal in future R -devel. + \item Backport changes by CRAN maintainers (2.8.5-1): remove \code{all.tests=TRUE} in \code{parallel::detectCores()} + \item Backport changes by CRAN maintainers (2.8.5-1): delete file \code{examples/lfe-Ex.Rout.save} + \item Fix a few typos with \code{devtools::spell_check()} + \item Added an internal \code{devtools_internal.R} with few testing calls. + } +} \section{Changes in version 2.8-5}{ \itemize{ \item Corrected the vignette about estimable functions. The diff --git a/inst/WORDLIST b/inst/WORDLIST new file mode 100644 index 0000000..5cb8e0c --- /dev/null +++ b/inst/WORDLIST @@ -0,0 +1,42 @@ +Yogo +Yoshizawa +Windmeijer +Technometrics +Systemen +Szeged +Strohmer +Vershynin +walkthrough +Stata +stata +Schank +Sargan +Sanderson +Salama +Rothenberg +Petersburg +Ouazad +Markussen +mathscinet +Margolis +Lettres +Lovell +Kupper +Kramarz +Koshy +Kolesar +Kaczmarz +Kaasschieter +IZA +Imbens +invertible +interpretable +Hundal +Hedayat +Halperin +Hein +Guimaraes +Gaure +Seidel +SLS +Millo \ No newline at end of file diff --git a/inst/devtools_internal.R b/inst/devtools_internal.R new file mode 100644 index 0000000..09365c4 --- /dev/null +++ b/inst/devtools_internal.R @@ -0,0 +1,33 @@ + + +## Initiate some docs +# usethis::use_cran_comments(open = FALSE) + +## Spell check +devtools::spell_check() + + + +### Tests +devtools::check_win_devel() +devtools::check_win_release() +devtools::check_win_oldrelease() + + +## Check on R-hub +plats_default <- c("windows-x86_64-devel", "ubuntu-gcc-release", "fedora-clang-devel", + "linux-x86_64-rocker-gcc-san") # from: rhub:::default_cran_check_platforms(".") +plats_add <- c("solaris-x86-patched", "solaris-x86-patched-ods") +plats_all <- c(plats_default, plats_add) +devtools::check_rhub(platform = plats_all, + env_vars = c(`_R_CHECK_FORCE_SUGGESTS_` = "false"), ##testhat not working on Solaris by May 2022 + interactive = FALSE) + +## reverse dependencies +# devtools::install_github("r-lib/revdepcheck") +revdepcheck::revdep_check(num_workers = 1) + +### Upload to CRAN +devtools::release_checks() +devtools::release() +# direct: devtools::submit_cran() \ No newline at end of file diff --git a/inst/doc/CHANGELOG b/inst/misc/CHANGELOG similarity index 100% rename from inst/doc/CHANGELOG rename to inst/misc/CHANGELOG diff --git a/inst/doc/gen2.R b/inst/misc/gen2.R similarity index 100% rename from inst/doc/gen2.R rename to inst/misc/gen2.R diff --git a/inst/doc/index.html b/inst/misc/index.html similarity index 100% rename from inst/doc/index.html rename to inst/misc/index.html diff --git a/inst/doc/lfeguide.txt b/inst/misc/lfeguide.txt similarity index 100% rename from inst/doc/lfeguide.txt rename to inst/misc/lfeguide.txt diff --git a/inst/doc/test2.lfe b/inst/misc/test2.lfe similarity index 100% rename from inst/doc/test2.lfe rename to inst/misc/test2.lfe diff --git a/lfe.Rproj b/lfe.Rproj new file mode 100644 index 0000000..eaa6b81 --- /dev/null +++ b/lfe.Rproj @@ -0,0 +1,18 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX + +BuildType: Package +PackageUseDevtools: Yes +PackageInstallArgs: --no-multiarch --with-keep.source +PackageRoxygenize: rd,collate,namespace diff --git a/man/bccorr.Rd b/man/bccorr.Rd index 6e05c56..b2c444d 100644 --- a/man/bccorr.Rd +++ b/man/bccorr.Rd @@ -16,9 +16,9 @@ bccorr( } \arguments{ \item{est}{an object of class '"felm"', the result of a call to -\code{\link{felm}(keepX=TRUE)}.} +\verb{[felm](keepX=TRUE)}.} -\item{alpha}{a data frame, the result of a call to \code{\link{getfe}}.} +\item{alpha}{a data frame, the result of a call to \code{\link[=getfe]{getfe()}}.} \item{corrfactors}{integer or character vector of length 2. The factors to correlate. The default is fine if there are only two factors in the model.} @@ -35,17 +35,17 @@ factors are present, or that they are uncorrelated with them.} \value{ \code{bccorr} returns a named integer vector with the following fields: - \item{corr}{the bias corrected correlation.} - \item{v1}{the bias corrected variance for the first factor specified - by \code{corrfactors}.} - \item{v2}{the bias corrected variance for the second factor.} - \item{cov}{the bias corrected covariance between the two factors.} - \item{d1}{the bias correction for the first factor.} - \item{d2}{the bias correction for the second factor.} - \item{d12}{the bias correction for covariance.} +\item{corr}{the bias corrected correlation.} +\item{v1}{the bias corrected variance for the first factor specified +by \code{corrfactors}.} +\item{v2}{the bias corrected variance for the second factor.} +\item{cov}{the bias corrected covariance between the two factors.} +\item{d1}{the bias correction for the first factor.} +\item{d2}{the bias correction for the second factor.} +\item{d12}{the bias correction for covariance.} - The bias corrections have been subtracted from the bias estimates. - E.g. v2 = v2' - d2, where v2' is the biased variance. +The bias corrections have been subtracted from the bias estimates. +E.g. v2 = v2' - d2, where v2' is the biased variance. } \description{ With a model like \eqn{y = X\beta + D\theta + F\psi + \epsilon}, where \eqn{D} @@ -63,7 +63,7 @@ where x is a vector with coordinates drawn uniformly from the set \eqn{\{-1,1\}} More specifically, the expectation is estimated by sample means, i.e. in each sample a vector x is drawn, the equation \eqn{Bv = Cx} is solved by a conjugate gradient method, and the -real number \eqn{x^t Av} is computed. +real number \eqn{x^t Av} is computed. There are three bias corrections, for the variances of \eqn{D\theta} (\code{vD}) and \eqn{F\psi} (\code{vF}), and their covariance (\code{vDF}).The correlation is computed as @@ -71,17 +71,17 @@ There are three bias corrections, for the variances of \eqn{D\theta} (\code{vD}) relative tolerance specified by the argument \code{tol}. The covariance bias is estimated to an absolute tolerance in the correlation \code{rho} (conditional on the already bias corrected \code{vD} and \code{vF}) specified by -\code{tol}. The CG algortithm does not need to be exceedingly precise, +\code{tol}. The CG algorithm does not need to be exceedingly precise, it is terminated when the solution reaches a precision which is -sufficient for the chosen precision in \code{vD, vF, vDF}. +sufficient for the chosen precision in \verb{vD, vF, vDF}. -If \code{est} is the result of a weighted \code{\link{felm}} estimation, +If \code{est} is the result of a weighted \code{\link[=felm]{felm()}} estimation, the variances and correlations are weighted too. } \note{ Bias correction for IV-estimates are not supported as of now. -Note that if \code{est} is the result of a call to \code{\link{felm}} +Note that if \code{est} is the result of a call to \code{\link[=felm]{felm()}} with \code{keepX=FALSE} (the default), the correlation will be computed as if the covariates X are independent of the two factors. This will be faster (typically by a factor of approx. 4), and possibly wronger. @@ -99,28 +99,28 @@ x <- rnorm(500) x2 <- rnorm(length(x)) ## create individual and firm -id <- factor(sample(40,length(x),replace=TRUE)) -firm <- factor(sample(30,length(x),replace=TRUE,prob=c(2,rep(1,29)))) -foo <- factor(sample(20,length(x),replace=TRUE)) +id <- factor(sample(40, length(x), replace = TRUE)) +firm <- factor(sample(30, length(x), replace = TRUE, prob = c(2, rep(1, 29)))) +foo <- factor(sample(20, length(x), replace = TRUE)) ## effects id.eff <- rnorm(nlevels(id)) firm.eff <- rnorm(nlevels(firm)) foo.eff <- rnorm(nlevels(foo)) ## left hand side -y <- x + 0.25*x2 + id.eff[id] + firm.eff[firm] + foo.eff[foo] + rnorm(length(x)) +y <- x + 0.25 * x2 + id.eff[id] + firm.eff[firm] + foo.eff[foo] + rnorm(length(x)) # make a data frame -fr <- data.frame(y,x,x2,id,firm,foo) +fr <- data.frame(y, x, x2, id, firm, foo) ## estimate and print result -est <- felm(y ~ x+x2|id+firm+foo, data=fr, keepX=TRUE) +est <- felm(y ~ x + x2 | id + firm + foo, data = fr, keepX = TRUE) # find bias corrections bccorr(est) } \references{ Gaure, S. (2014), \cite{Correlation bias correction in two-way - fixed-effects linear regression}, Stat 3(1):379:390, 2014. +fixed-effects linear regression}, Stat 3(1):379:390, 2014. } \seealso{ -\code{\link{fevcov}} +\code{\link[=fevcov]{fevcov()}} } \concept{Limited Mobility Bias} diff --git a/man/btrap.Rd b/man/btrap.Rd index 49b27a8..4a23fcc 100644 --- a/man/btrap.Rd +++ b/man/btrap.Rd @@ -17,14 +17,14 @@ btrap( ) } \arguments{ -\item{alpha}{data frame returned from \code{\link{getfe}}} +\item{alpha}{data frame returned from \code{\link[=getfe]{getfe()}}} \item{obj}{object of class \code{"felm"}, usually, a result of a call to -\code{\link{felm}}} +\code{\link[=felm]{felm()}}} \item{N}{integer. The number of bootstrap iterations} -\item{ef}{function. An estimable function such as in \code{\link{getfe}}. +\item{ef}{function. An estimable function such as in \code{\link[=getfe]{getfe()}}. The default is to use the one used on \code{alpha}} \item{eps}{double. Tolerance for centering, as in getfe} @@ -44,11 +44,11 @@ errors filled in. } \description{ Bootstrap standard errors for the group fixed effects which were swept out -during an estimation with \code{\link{felm}}. +during an estimation with \code{\link[=felm]{felm()}}. } \details{ The bootstrapping is done in parallel if \code{threads > 1}. -\code{\link{btrap}} is run automatically from \code{\link{getfe}} if +\code{\link[=btrap]{btrap()}} is run automatically from \code{\link[=getfe]{getfe()}} if \code{se=TRUE} is specified. To save some overhead, the individual iterations are grouped together, the memory available for this grouping is fetched with \code{getOption('lfe.bootmem')}, which is initialized upon @@ -57,49 +57,50 @@ loading of \pkg{lfe} to \code{options(lfe.bootmem=500)} (MB). If \code{robust=TRUE}, heteroskedastic robust standard errors are estimated. If \code{robust=FALSE} and \code{cluster=TRUE}, clustered standard errors with the cluster specified to \code{felm()} are estimated. If \code{cluster} -is a factor, it is used for the cluster definition. \code{cluster may} also +is a factor, it is used for the cluster definition. \verb{cluster may} also be a list of factors. } \examples{ -oldopts <- options(lfe.threads=2) +oldopts <- options("lfe.threads") +options(lfe.threads = 2) ## create covariates x <- rnorm(3000) x2 <- rnorm(length(x)) ## create individual and firm -id <- factor(sample(700,length(x),replace=TRUE)) -firm <- factor(sample(300,length(x),replace=TRUE)) +id <- factor(sample(700, length(x), replace = TRUE)) +firm <- factor(sample(300, length(x), replace = TRUE)) ## effects id.eff <- rlnorm(nlevels(id)) firm.eff <- rexp(nlevels(firm)) ## left hand side -y <- x + 0.25*x2 + id.eff[id] + firm.eff[firm] + rnorm(length(x)) +y <- x + 0.25 * x2 + id.eff[id] + firm.eff[firm] + rnorm(length(x)) ## estimate and print result -est <- felm(y ~ x+x2 | id + firm) +est <- felm(y ~ x + x2 | id + firm) summary(est) ## extract the group effects alpha <- getfe(est) head(alpha) ## bootstrap standard errors -head(btrap(alpha,est)) +head(btrap(alpha, est)) ## bootstrap some differences -ef <- function(v,addnames) { - w <- c(v[2]-v[1],v[3]-v[2],v[3]-v[1]) - if(addnames) { - names(w) <-c('id2-id1','id3-id2','id3-id1') - attr(w,'extra') <- list(note=c('line1','line2','line3')) +ef <- function(v, addnames) { + w <- c(v[2] - v[1], v[3] - v[2], v[3] - v[1]) + if (addnames) { + names(w) <- c("id2-id1", "id3-id2", "id3-id1") + attr(w, "extra") <- list(note = c("line1", "line2", "line3")) } w } # check that it's estimable -is.estimable(ef,est$fe) +is.estimable(ef, est$fe) -head(btrap(alpha,est,ef=ef)) +head(btrap(alpha, est, ef = ef)) options(oldopts) } diff --git a/man/cgsolve.Rd b/man/cgsolve.Rd index 5c7a5a2..8d4c044 100644 --- a/man/cgsolve.Rd +++ b/man/cgsolve.Rd @@ -25,8 +25,8 @@ A solution \eqn{x} of the linear system \eqn{A x = b} is returned. \description{ \code{cgsolve} uses a conjugate gradient algorithm to solve the linear system \eqn{A x = b} where \eqn{A} is a symmetric matrix. \code{cgsolve} is -used internally in \pkg{lfe} in the routines \code{\link{fevcov}} and -\code{\link{bccorr}}, but has been made public because it might be useful +used internally in \pkg{lfe} in the routines \code{\link[=fevcov]{fevcov()}} and +\code{\link[=bccorr]{bccorr()}}, but has been made public because it might be useful for other purposes as well. } \details{ @@ -50,32 +50,32 @@ tolerance. The termination criterion for \code{cgsolve} is the one from Preconditioning is currently not supported. If \code{A} is a function, the test for symmetry is performed by drawing two -random vectors \code{x,y}, and testing whether \eqn{|(Ax, y) - (x, Ay)| < +random vectors \verb{x,y}, and testing whether \eqn{|(Ax, y) - (x, Ay)| < 10^{-6} sqrt((||Ax||^2 + ||Ay||^2)/N)}, where \eqn{N} is the vector length. Thus, the test is neither deterministic nor perfect. } \examples{ - N <- 100000 +N <- 100000 # create some factors - f1 <- factor(sample(34000,N,replace=TRUE)) - f2 <- factor(sample(25000,N,replace=TRUE)) +f1 <- factor(sample(34000, N, replace = TRUE)) +f2 <- factor(sample(25000, N, replace = TRUE)) # a matrix of dummies, which probably is rank deficient - B <- makeDmatrix(list(f1,f2)) - dim(B) +B <- makeDmatrix(list(f1, f2)) +dim(B) # create a right hand side - b <- as.matrix(B \%*\% rnorm(ncol(B))) +b <- as.matrix(B \%*\% rnorm(ncol(B))) # solve B' B x = B' b - sol <- cgsolve(crossprod(B), crossprod(B, b), eps=-1e-2) - #verify solution - sqrt(sum((B \%*\% sol - b)^2)) +sol <- cgsolve(crossprod(B), crossprod(B, b), eps = -1e-2) +# verify solution +sqrt(sum((B \%*\% sol - b)^2)) } \references{ Kaasschieter, E. (1988) \cite{A practical termination criterion for the conjugate gradient method}, BIT Numerical Mathematics, -28(2):308-322. \url{http://link.springer.com/article/10.1007\%2FBF01934094} +28(2):308-322. \url{https://link.springer.com/article/10.1007/BF01934094} } \seealso{ -\code{\link{kaczmarz}} +\code{\link[=kaczmarz]{kaczmarz()}} } diff --git a/man/chainsubset.Rd b/man/chainsubset.Rd index fed49a2..1b57770 100644 --- a/man/chainsubset.Rd +++ b/man/chainsubset.Rd @@ -33,18 +33,17 @@ find the ones with \code{x > mean(y)}. The last \code{mean(y)} is now condition \note{ Some trickery is done to make this work directly in the subset argument of functions like \code{felm()} and \code{lm()}. It might possibly fail with an error message in some situations. -If this happens, it should be done in two steps: \code{ss <- eval(chainsubset(...),data); -lm(...,data=data, subset=ss)}. In particular, the arguments are taken literally, -constructions like \code{function(...) {chainsubset(...)}} or \code{a <- quote(x < y); chainsubset(a)} do +If this happens, it should be done in two steps: \verb{ss <- eval(chainsubset(...),data); lm(...,data=data, subset=ss)}. In particular, the arguments are taken literally, +constructions like \code{function(...) {chainsubset(...)}} or \verb{a <- quote(x < y); chainsubset(a)} do not work, but \code{do.call(chainsubset,list(a))} does. } \examples{ set.seed(48) N <- 10000 -dat <- data.frame(y=rnorm(N), x=rnorm(N)) +dat <- data.frame(y = rnorm(N), x = rnorm(N)) # It's not the same as and'ing the conditions: -felm(y ~ x,data=dat,subset=chainsubset(x < mean(y), y < 2*mean(x))) -felm(y ~ x,data=dat,subset=chainsubset(y < 2*mean(x), x < mean(y))) -felm(y ~ x,data=dat,subset=(x < mean(y)) & (y < 2*mean(x))) -lm(y ~ x, data=dat, subset=chainsubset(x < mean(y), x > mean(y))) +felm(y ~ x, data = dat, subset = chainsubset(x < mean(y), y < 2 * mean(x))) +felm(y ~ x, data = dat, subset = chainsubset(y < 2 * mean(x), x < mean(y))) +felm(y ~ x, data = dat, subset = (x < mean(y)) & (y < 2 * mean(x))) +lm(y ~ x, data = dat, subset = chainsubset(x < mean(y), x > mean(y))) } diff --git a/man/compfactor.Rd b/man/compfactor.Rd index c5e470f..e0bb33c 100644 --- a/man/compfactor.Rd +++ b/man/compfactor.Rd @@ -37,14 +37,14 @@ about this in one of the vignettes. \examples{ ## create two factors -f1 <- factor(sample(300,400,replace=TRUE)) -f2 <- factor(sample(300,400,replace=TRUE)) +f1 <- factor(sample(300, 400, replace = TRUE)) +f2 <- factor(sample(300, 400, replace = TRUE)) ## find the components -cf <- compfactor(list(f1=f1,f2=f2)) +cf <- compfactor(list(f1 = f1, f2 = f2)) ## show the third largest component -fr <- data.frame(f1,f2,cf) -fr[cf==3,] +fr <- data.frame(f1, f2, cf) +fr[cf == 3, ] } diff --git a/man/condfstat.Rd b/man/condfstat.Rd index 4903eea..9971ffd 100644 --- a/man/condfstat.Rd +++ b/man/condfstat.Rd @@ -9,9 +9,9 @@ condfstat(object, type = "default", quantiles = 0, bN = 100L) } \arguments{ \item{object}{object of class \code{"felm"}, a result of a call to -\code{\link{felm}}.} +\code{\link[=felm]{felm()}}.} -\item{type}{character. Error structure. Passed to \code{\link{waldtest}}. If +\item{type}{character. Error structure. Passed to \code{\link[=waldtest]{waldtest()}}. If \code{NULL}, both iid and robust Fs are returned.} \item{quantiles}{numeric. Quantiles for bootstrap.} @@ -23,7 +23,7 @@ A p x k matrix, where k is the number of endogenous variables. Each row are the conditional F statistics on a residual equation as described in \cite{Sanderson and Windmeijer (2014)}, for a certain error structure. The default is to use iid, or cluster if a cluster was specified to -\code{\link{felm}}. The third choice is \code{'robust'}, for heteroskedastic +\code{\link[=felm]{felm()}}. The third choice is \code{'robust'}, for heteroskedastic errors. If \code{type=NULL}, iid and robust Fs are returned, and cluster, if that was specified to \code{felm}. @@ -60,7 +60,7 @@ are projected out of the equations before doing the bootstrap. } \note{ Please note that \code{condfstat} does not work with the old syntax -for IV in \code{\link{felm}(...,iv=)}. The new multipart syntax must be +for IV in \verb{[felm](...,iv=)}. The new multipart syntax must be used. } \examples{ @@ -70,33 +70,33 @@ z2 <- rnorm(length(z1)) u <- rnorm(length(z1)) # make x1, x2 correlated with errors u -x1 <- z1 + z2 + 0.2*u + rnorm(length(z1)) -x2 <- z1 + 0.94*z2 - 0.3*u + rnorm(length(z1)) +x1 <- z1 + z2 + 0.2 * u + rnorm(length(z1)) +x2 <- z1 + 0.94 * z2 - 0.3 * u + rnorm(length(z1)) y <- x1 + x2 + u est <- felm(y ~ 1 | 0 | (x1 | x2 ~ z1 + z2)) summary(est) \dontrun{ -summary(est$stage1, lhs='x1') -summary(est$stage1, lhs='x2') +summary(est$stage1, lhs = "x1") +summary(est$stage1, lhs = "x2") } # the joint significance of the instruments in both the first stages are ok: -t(sapply(est$stage1$lhs, function(lh) waldtest(est$stage1, ~z1|z2, lhs=lh))) -# everything above looks fine, t-tests for instruments, +t(sapply(est$stage1$lhs, function(lh) waldtest(est$stage1, ~ z1 | z2, lhs = lh))) +# everything above looks fine, t-tests for instruments, # as well as F-tests for excluded instruments in the 1st stages. # The conditional F-test reveals that the instruments are jointly weak # (it's close to being only one instrument, z1+z2, for both x1 and x2) -condfstat(est, quantiles=c(0.05, 0.95)) +condfstat(est, quantiles = c(0.05, 0.95)) } \references{ Sanderson, E. and F. Windmeijer (2014) \cite{A weak instrument F-test in linear IV models with multiple endogenous variables}, Journal of Econometrics, 2015. -\url{http://www.sciencedirect.com/science/article/pii/S0304407615001736} +\url{https://www.sciencedirect.com/science/article/pii/S0304407615001736} Stock, J.H. and M. Yogo (2004) \cite{Testing for weak instruments in linear -IV regression}, \url{http://ssrn.com/abstract=1734933} in +IV regression}, \url{https://www.ssrn.com/abstract=1734933} in \cite{Identification and inference for econometric models: Essays in honor of Thomas Rothenberg}, 2005. } diff --git a/man/demeanlist.Rd b/man/demeanlist.Rd index ddc7dd3..9a74b35 100644 --- a/man/demeanlist.Rd +++ b/man/demeanlist.Rd @@ -42,30 +42,30 @@ vector is centered, but not more often than every \code{progress} minutes).} This may improve convergence.} \item{means}{logical. Should the means instead of the demeaned matrix be - returned? Setting \code{means=TRUE} will return \code{mtx - demeanlist(mtx,...)}, +returned? Setting \code{means=TRUE} will return \code{mtx - demeanlist(mtx,...)}, but without the extra copy.} \item{weights}{numeric. For weighted demeaning.} \item{scale}{logical. Specify scaling for weighted demeaning.} -\item{na.rm}{logical which indicates what should happen when the data +\item{na.rm}{logical which indicates what should happen when the data contain \code{NA}s. If TRUE, rows in the input \code{mtx} are removed prior to centering. If FALSE, they are kept, leading to entire groups becoming NA in the output.} -\item{attrs}{list. List of attributes which should be attached to the output. +\item{attrs}{list. List of attributes which should be attached to the output. Used internally.} } \value{ -If \code{mtx} is a matrix, a matrix of the same shape, possibly with +If \code{mtx} is a matrix, a matrix of the same shape, possibly with column \code{icpt} deleted. If \code{mtx} is a list of vectors and matrices, a list of the same length is returned, with the same vector and matrix-pattern, but the -matrices have the column \code{icpt} deleted. +matrices have the column \code{icpt} deleted. -If \code{mtx} is a \code{'data.frame'}, a \code{'data.frame'} +If \code{mtx} is a \code{'data.frame'}, a \code{'data.frame'} with the same names is returned; the \code{icpt} argument is ignored. If \code{na.rm} is specified, the return value has an attribute \code{'na.rm'} with a vector of @@ -75,16 +75,16 @@ memory usage is an issue and many runs are done, one might consider removing NAs } \description{ Uses the method of alternating projections to centre - a (model) matrix on multiple groups, as specified by a list of factors. - This function is called by \code{\link{felm}}, but it has been - made available as standalone in case it's needed. In particular, if +a (model) matrix on multiple groups, as specified by a list of factors. +This function is called by \code{\link[=felm]{felm()}}, but it has been +made available as standalone in case it's needed. In particular, if one does not need transformations provided by R-formulas but have the covariates present as a matrix or a data.frame, a substantial amount of time can be saved in the centering. } \details{ For each column \code{y} in \code{mtx}, the equivalent of the following centering is performed, with \code{cy} as the result. -\preformatted{ +\preformatted{ cy <- y; oldy <- y-1 while(sqrt(sum((cy-oldy)**2)) >= eps) { oldy <- cy @@ -100,7 +100,7 @@ covariate \code{x} and a factor \code{f}, it is like projecting out the interaction \code{x:f}. The \code{'x'} attribute can also be a matrix of column vectors, in this case it can be beneficial to orthogonalize the columns, either with a stabilized Gram-Schmidt method, or with the simple -method \code{x \%*\% solve(chol(crossprod(x)))}. +method \verb{x \\\%*\\\% solve(chol(crossprod(x)))}. The \code{weights} argument is used if a weighted projection is computed. If \eqn{W} is the diagonal matrix with \code{weights} on the @@ -117,13 +117,13 @@ the input should be scaled by \eqn{W}, and \code{scale[2]} specifies whether the output should be scaled by \eqn{W^{-1}}. This is just a convenience to save some memory copies in other functions in the package. -Note that for certain large datasets the overhead in \code{\link{felm}} +Note that for certain large datasets the overhead in \code{\link[=felm]{felm()}} is large compared to the time spent in \code{demeanlist}. If the data are present directly without having to use the formula-interface to \code{felm} for transformations etc, it is possible to run \code{demeanlist} directly on a matrix or \code{"data.frame"} and do the OLS "manually", e.g. with something like -\code{cx <- demeanlist(x,...); beta <- solve(crossprod(cx), crossprod(cx,y))} +\verb{cx <- demeanlist(x,...); beta <- solve(crossprod(cx), crossprod(cx,y))} In some applications it is known that a single centering iteration is sufficient. In particular, if \code{length(fl)==1} and there is no @@ -131,8 +131,7 @@ interaction attribute \code{x}. In this case the centering algorithm is terminated after the first iteration. There may be other cases, e.g. if there is a single factor with a matrix \code{x} with orthogonal columns. If you have such prior knowledge, it is possible to force termination after -the first iteration by adding an attribute \code{attr(fl, 'oneiter') <- -TRUE}. Convergence will be reached in the second iteration anyway, but +the first iteration by adding an attribute \code{attr(fl, 'oneiter') <- TRUE}. Convergence will be reached in the second iteration anyway, but you save one iteration, i.e. you double the speed. } \note{ @@ -143,23 +142,24 @@ Trans. Amer. Math. Soc. 355 pp 3433-3461 (2003). \code{demeanlist} will use an in place transform to save memory, provided the \code{mtx} argument is unnamed. Thus, as always in R, you shouldn't use temporary variables -like \code{tmp <- fun(x[v,]); bar <- demeanlist(tmp,...); rm(tmp)}, it will be much better to +like \verb{tmp <- fun(x[v,]); bar <- demeanlist(tmp,...); rm(tmp)}, it will be much better to do \code{bar <- demeanlist(fun(x[v,]),...)}. However, demeanlist allows a construction like \code{bar <- demeanlist(unnamed(tmp),...)} which will use an in place transformation, i.e. tmp will be modified, quite contrary to the usual semantics of R. } \examples{ -oldopts <- options(lfe.threads=1) +oldopts <- options("lfe.threads") +options(lfe.threads = 2) ## create a matrix -mtx <- data.frame(matrix(rnorm(999),ncol=3)) +mtx <- data.frame(matrix(rnorm(999), ncol = 3)) # a list of factors -rgb <- c('red','green','blue') -fl <- replicate(4, factor(sample(rgb,nrow(mtx),replace=TRUE)), simplify=FALSE) -names(fl) <- paste('g',seq_along(fl),sep='') +rgb <- c("red", "green", "blue") +fl <- replicate(4, factor(sample(rgb, nrow(mtx), replace = TRUE)), simplify = FALSE) +names(fl) <- paste("g", seq_along(fl), sep = "") # centre on all means -mtx0 <- demeanlist(mtx,fl) -head(data.frame(mtx0,fl)) +mtx0 <- demeanlist(mtx, fl) +head(data.frame(mtx0, fl)) # verify that the group means for the columns are zero -lapply(fl, function(f) apply(mtx0,2,tapply,f,mean)) +lapply(fl, function(f) apply(mtx0, 2, tapply, f, mean)) options(oldopts) } diff --git a/man/efactory.Rd b/man/efactory.Rd index 69df37c..0bd2f5a 100644 --- a/man/efactory.Rd +++ b/man/efactory.Rd @@ -8,14 +8,14 @@ efactory(obj, opt = "ref", ...) } \arguments{ \item{obj}{object of class \code{"felm"}, usually, a result of a call to -\code{\link{felm}}.} +\code{\link[=felm]{felm()}}.} \item{opt}{character. Which type of estimable function.} \item{...}{various.} } \value{ -A function of two parameters \code{function(v,addnames)}. An +A function of two parameters \verb{function(v,addnames)}. An estimable function (i.e. the result is the vector of some length \code{N}) of the input vector \code{v}. When \code{addnames==TRUE} the returned vector should have names, and optionally an attribute \code{"extra"} which is a @@ -28,7 +28,7 @@ Creates an estimable function for a factor-structure. \details{ There are several possibilities for the input parameter \code{opt}. \itemize{ \item \code{"ref"} yields an estimable function which is similar -to the default one in \code{\link{lm}}, one reference is forced to \code{0} +to the default one in \code{\link[=lm]{lm()}}, one reference is forced to \code{0} in each connected component. \item \code{"zm"} Similar to \code{"ref"}, but the factor which does not contain a reference is made to have zero mean, and an intercept is added. \item \code{"zm2"} Similar to \code{"zm"}, but both @@ -61,19 +61,20 @@ I.e. if the model is \code{y ~ ... |id + firm}, we have } \examples{ -oldopts <- options(lfe.threads=1) -id <- factor(sample(5000,50000,replace=TRUE)) -firm <- factor(sample(3000,50000,replace=TRUE)) -fl <- list(id=id,firm=firm) -obj <- list(fe=fl,cfactor=compfactor(fl)) +oldopts <- options("lfe.threads") +options(lfe.threads = 2) +id <- factor(sample(5000, 50000, replace = TRUE)) +firm <- factor(sample(3000, 50000, replace = TRUE)) +fl <- list(id = id, firm = firm) +obj <- list(fe = fl, cfactor = compfactor(fl)) ## the trivial least-norm transformtion, which by the way is non-estimable -print(ef <- efactory(obj,'ln')) -is.estimable(ef,fl) +print(ef <- efactory(obj, "ln")) +is.estimable(ef, fl) ## then the default -print(ef <- efactory(obj,'ref')) -is.estimable(ef,fl) +print(ef <- efactory(obj, "ref")) +is.estimable(ef, fl) # get the names of the coefficients, i.e. the nm-variable in the function -head(evalq(nm,environment(ef))) +head(evalq(nm, environment(ef))) options(oldopts) } diff --git a/man/felm.Rd b/man/felm.Rd index a56965f..89f3499 100644 --- a/man/felm.Rd +++ b/man/felm.Rd @@ -55,14 +55,14 @@ squares is used with weights \code{weights} (that is, minimizing \item{...}{other arguments. \itemize{ -\item \code{cmethod} character. Which clustering method to use. Known +\item \code{cmethod} character. Which clustering method to use. Known arguments are \code{'cgm'} (the default), \code{'cgm2'} (or \code{'reghdfe'}, -its alias). These alternate methods will generally +its alias). These alternate methods will generally yield equivalent results, except in the case of multiway clustering with few -clusters along at least one dimension. +clusters along at least one dimension. \item \code{keepX} logical. To include a copy of the expanded data matrix in -the return value, as needed by \code{\link{bccorr}} and \code{\link{fevcov}} +the return value, as needed by \code{\link[=bccorr]{bccorr()}} and \code{\link[=fevcov]{fevcov()}} for proper limited mobility bias correction. \item \code{keepCX} logical. Keep a copy of the centred expanded data matrix @@ -83,14 +83,13 @@ correct, this should only have an effect when the clustering factors have very few levels. \item \code{kclass} character. For use with instrumental variables. Use a -k-class estimator rather than 2SLS/IV. Currently, the values \code{'nagar', -'b2sls', 'mb2sls', 'liml'} are accepted, where the names are from +k-class estimator rather than 2SLS/IV. Currently, the values \verb{'nagar', 'b2sls', 'mb2sls', 'liml'} are accepted, where the names are from \cite{Kolesar et al (2014)}, as well as a numeric value for the 'k' in k-class. With \code{kclass='liml'}, \code{felm} also accepts the argument -\code{fuller=}, for using a Fuller adjustment of the +\verb{fuller=}, for using a Fuller adjustment of the liml-estimator. -\item \code{Nboot, bootexpr, bootcluster} Since \code{felm} has quite a bit +\item \verb{Nboot, bootexpr, bootcluster} Since \code{felm} has quite a bit of overhead in the creation of the model matrix, if one wants confidence intervals for some function of the estimated parameters, it is possible to bootstrap internally in \code{felm}. That is, the model matrix is resampled @@ -109,7 +108,7 @@ needed in the bootstrap. If you need the covariance matrices in the full estimate, but not in the bootstrap, you can specify it in an attribute \code{"boot"} as \code{nostats=structure(FALSE, boot=TRUE)}. -\item \code{iv, clustervar} deprecated. These arguments will be removed at +\item \verb{iv, clustervar} deprecated. These arguments will be removed at a later time, but are still supported in this field. Users are \emph{STRONGLY} encouraged to use multipart formulas instead. In particular, not all functionality is supported with the deprecated syntax; @@ -168,7 +167,7 @@ the unrestricted model.} \item{X}{matrix. The expanded data matrix, i.e. from the first part of the formula. To save memory with large datasets, it is only included if \code{felm(keepX=TRUE)} is specified. Must be included if -\code{\link{bccorr}} or \code{\link{fevcov}} is to be used for correcting +\code{\link[=bccorr]{bccorr()}} or \code{\link[=fevcov]{fevcov()}} is to be used for correcting limited mobility bias. } \item{cX, cY}{matrix. The centred expanded data matrix. Only included if @@ -187,22 +186,20 @@ remaining coefficients with OLS. This function is intended for use with large datasets with multiple group effects of large cardinality. If dummy-encoding the group effects results in a manageable number of coefficients, you are probably better off by using -\code{\link{lm}}. +\code{\link[=lm]{lm()}}. The formula specification is a response variable followed by a four part formula. The first part consists of ordinary covariates, the second part consists of factors to be projected out. The third part is an IV-specification. The fourth part is a cluster specification for the -standard errors. I.e. something like \code{y ~ x1 + x2 | f1 + f2 | (Q|W ~ -x3+x4) | clu1 + clu2} where \code{y} is the response, \code{x1,x2} are -ordinary covariates, \code{f1,f2} are factors to be projected out, \code{Q} +standard errors. I.e. something like \code{y ~ x1 + x2 | f1 + f2 | (Q|W ~ x3+x4) | clu1 + clu2} where \code{y} is the response, \verb{x1,x2} are +ordinary covariates, \verb{f1,f2} are factors to be projected out, \code{Q} and \code{W} are covariates which are instrumented by \code{x3} and -\code{x4}, and \code{clu1,clu2} are factors to be used for computing cluster +\code{x4}, and \verb{clu1,clu2} are factors to be used for computing cluster robust standard errors. Parts that are not used should be specified as \code{0}, except if it's at the end of the formula, where they can be omitted. The parentheses are needed in the third part since \code{|} has -higher precedence than \code{~}. Multiple left hand sides like \code{y|w|x ~ -x1 + x2 |f1+f2|...} are allowed. +higher precedence than \code{~}. Multiple left hand sides like \code{y|w|x ~ x1 + x2 |f1+f2|...} are allowed. Interactions between a covariate \code{x} and a factor \code{f} can be projected out with the syntax \code{x:f}. The terms in the second and @@ -214,17 +211,19 @@ parser does not keep the order. This means that in interactions, the factor a factor. I.e. in \code{y ~ x1 | x:f1 + f2}, the \code{f1} must be a factor, whereas it will work as expected if \code{f2} is an integer vector. -In older versions of \pkg{lfe} the syntax was \code{felm(y ~ x1 + x2 + G(f1) -+ G(f2), iv=list(Q ~ x3+x4, W ~ x3+x4), clustervar=c('clu1','clu2'))}. This +In older versions of \pkg{lfe} the syntax was `felm(y ~ x1 + x2 + G(f1) +\itemize{ +\item G(f2), iv=list(Q ~ x3+x4, W ~ x3+x4), clustervar=c('clu1','clu2'))`. This syntax still works, but yields a warning. Users are \emph{strongly} encouraged to change to the new multipart formula syntax. The old syntax will be removed at a later time. +} The standard errors are adjusted for the reduced degrees of freedom coming from the dummies which are implicitly present. (An exception occurs in the -case of clustered standard errors and, specifically, where clusters are -nested within fixed effects; see -\href{https://github.com/sgaure/lfe/issues/1#issuecomment-528643802}{here}.) +case of clustered standard errors and, specifically, where clusters are +nested within fixed effects; see +\href{https://github.com/sgaure/lfe/issues/1#issuecomment-528643802}{here}.) In the case of two factors, the exact number of implicit dummies is easy to compute. If there are more factors, the number of dummies is estimated by assuming there's one @@ -242,33 +241,33 @@ The \code{contrasts} argument is similar to the one in \code{lm()}, it is used for factors in the first part of the formula. The factors in the second part are analyzed as part of a possible subsequent \code{getfe()} call. -The \code{cmethod} argument may affect the clustered covariance matrix (and -thus regressor standard errors), either directly or via adjustments to a +The \code{cmethod} argument may affect the clustered covariance matrix (and +thus regressor standard errors), either directly or via adjustments to a degrees of freedom scaling factor. In particular, Cameron, Gelbach and Miller -(CGM2011, sec. 2.3) describe two possible small cluster corrections that are +(CGM2011, sec. 2.3) describe two possible small cluster corrections that are relevant in the case of multiway clustering. \itemize{ -\item The first approach adjusts each component of the cluster-robust -variance estimator (CRVE) by its own \eqn{c_i} adjustment factor. For -example, the first component (with \eqn{G} clusters) is adjusted by -\eqn{c_1=\frac{G}{G-1}\frac{N-1}{N-K}}{c_1 = G/(G-1)*(N-1)/(N-K)}, +\item The first approach adjusts each component of the cluster-robust +variance estimator (CRVE) by its own \eqn{c_i} adjustment factor. For +example, the first component (with \eqn{G} clusters) is adjusted by +\eqn{c_1=\frac{G}{G-1}\frac{N-1}{N-K}}{c_1 = G/(G-1)*(N-1)/(N-K)}, the second component (with \eqn{H} clusters) is adjusted by \eqn{c_2=\frac{H}{H-1}\frac{N-1}{N-K}}{c_2 = H/(H-1)*(N-1)/(N-K)}, etc. \item The second approach applies the same adjustment to all CRVE components: \eqn{c=\frac{J}{J-1}\frac{N-1}{N-K}}{c = J/(J-1)*(N-1)/(N-K)}, where -\eqn{J=\min(G,H)}{J=min(G,H)} in the case of two-way clustering, for example. +\eqn{J=\min(G,H)}{J=min(G,H)} in the case of two-way clustering, for example. } -Any differences resulting from these two approaches are likely to be minor, -and they will obviously yield exactly the same results when there is only one -cluster dimension. Still, CGM2011 adopt the former approach in their own -paper and simulations. This is also the default method that \code{felm} uses -(i.e. \code{cmethod = 'cgm'}). However, the latter approach has since been -adopted by several other packages that allow for robust inference with +Any differences resulting from these two approaches are likely to be minor, +and they will obviously yield exactly the same results when there is only one +cluster dimension. Still, CGM2011 adopt the former approach in their own +paper and simulations. This is also the default method that \code{felm} uses +(i.e. \code{cmethod = 'cgm'}). However, the latter approach has since been +adopted by several other packages that allow for robust inference with multiway clustering. This includes the popular Stata package -\href{http://scorreia.com/software/reghdfe/}{reghdfe}, as well as the -\href{https://github.com/matthieugomez/FixedEffectModels.jl}{FixedEffectModels.jl} +\href{http://scorreia.com/software/reghdfe/}{reghdfe}, as well as the +\href{https://github.com/matthieugomez/FixedEffectModels.jl}{FixedEffectModels.jl} implementation in Julia. To match results from these packages exactly, use \code{cmethod = 'cgm2'} (or its alias, \code{cmethod = 'reghdfe'}). It is -possible that some residual differences may still remain; see discussion +possible that some residual differences may still remain; see discussion \href{https://github.com/sgaure/lfe/issues/1#issuecomment-530561314}{here}. The old syntax with a single part formula with the \code{G()} syntax for the @@ -296,73 +295,89 @@ in the data frame instead of the local environment where they are defined. } \examples{ -oldopts <- options(lfe.threads=1) +## Default is to use all cores. We'll limit it to 2 for this example. +oldopts <- options("lfe.threads") +options(lfe.threads = 2) ## Simulate data +set.seed(42) +n <- 1e3 + +d <- data.frame( + # Covariates + x1 = rnorm(n), + x2 = rnorm(n), + # Individuals and firms + id = factor(sample(20, n, replace = TRUE)), + firm = factor(sample(13, n, replace = TRUE)), + # Noise + u = rnorm(n) +) + +# Effects for individuals and firms +id.eff <- rnorm(nlevels(d$id)) +firm.eff <- rnorm(nlevels(d$firm)) -# Covariates -x <- rnorm(1000) -x2 <- rnorm(length(x)) -# Individuals and firms -id <- factor(sample(20,length(x),replace=TRUE)) -firm <- factor(sample(13,length(x),replace=TRUE)) -# Effects for them -id.eff <- rnorm(nlevels(id)) -firm.eff <- rnorm(nlevels(firm)) # Left hand side -u <- rnorm(length(x)) -y <- x + 0.5*x2 + id.eff[id] + firm.eff[firm] + u +d$y <- d$x1 + 0.5 * d$x2 + id.eff[d$id] + firm.eff[d$firm] + d$u ## Estimate the model and print the results -est <- felm(y ~ x + x2 | id + firm) +est <- felm(y ~ x1 + x2 | id + firm, data = d) summary(est) - -\dontrun{ # Compare with lm -summary(lm(y ~ x + x2 + id + firm-1))} +summary(lm(y ~ x1 + x2 + id + firm - 1, data = d)) ## Example with 'reverse causation' (IV regression) # Q and W are instrumented by x3 and the factor x4. -x3 <- rnorm(length(x)) -x4 <- sample(12,length(x),replace=TRUE) -Q <- 0.3*x3 + x + 0.2*x2 + id.eff[id] + 0.3*log(x4) - 0.3*y + rnorm(length(x),sd=0.3) -W <- 0.7*x3 - 2*x + 0.1*x2 - 0.7*id.eff[id] + 0.8*cos(x4) - 0.2*y+ rnorm(length(x),sd=0.6) +d$x3 <- rnorm(n) +d$x4 <- sample(12, n, replace = TRUE) +d$Q <- 0.3 * d$x3 + d$x1 + 0.2 * d$x2 + id.eff[d$id] + 0.3 * log(d$x4) - 0.3 * d$y + + rnorm(n, sd = 0.3) +d$W <- 0.7 * d$x3 - 2 * d$x1 + 0.1 * d$x2 - 0.7 * id.eff[d$id] + 0.8 * cos(d$x4) - + 0.2 * d$y + rnorm(n, sd = 0.6) + # Add them to the outcome variable -y <- y + Q + W +d$y <- d$y + d$Q + d$W ## Estimate the IV model and report robust SEs -ivest <- felm(y ~ x + x2 | id + firm | (Q|W ~ x3 + factor(x4))) -summary(ivest, robust=TRUE) +ivest <- felm(y ~ x1 + x2 | id + firm | (Q | W ~ x3 + factor(x4)), data = d) +summary(ivest, robust = TRUE) condfstat(ivest) - -\dontrun{ # Compare with the not instrumented fit: -summary(felm(y ~ x + x2 + Q + W | id + firm))} +summary(felm(y ~ x1 + x2 + Q + W | id + firm, data = d)) ## Example with multiway clustering # Create a large cluster group (500 clusters) and a small one (20 clusters) -cl1 <- factor(sample(rep(1:500, length.out=length(x)))) -cl2 <- factor(sample(rep(1:20, length.out=length(x)))) -# Function for adding clustered noise to our outcome variable +d$cl1 <- factor(sample(rep(1:500, length.out = n))) +d$cl2 <- factor(sample(rep(1:20, length.out = n))) +# Function for adding clustered noise to our outcome variable cl_noise <- function(cl) { - obs_per_cluster <- length(x)/nlevels(cl) - unlist(replicate(nlevels(cl), rnorm(obs_per_cluster, mean=rnorm(1), sd=runif(1)), simplify=FALSE)) + obs_per_cluster <- n / nlevels(cl) + unlist(replicate(nlevels(cl), + rnorm(obs_per_cluster, mean = rnorm(1), sd = runif(1)), + simplify = FALSE + )) } + # New outcome variable -y_cl <- x + 0.5*x2 + id.eff[id] + firm.eff[firm] + cl_noise(cl1) + cl_noise(cl2) +d$y_cl <- d$x1 + 0.5 * d$x2 + id.eff[d$id] + firm.eff[d$firm] + + cl_noise(d$cl1) + cl_noise(d$cl2) ## Estimate and print the model with cluster-robust SEs (default) -est_cl <- felm(y_cl ~ x + x2 | id + firm | 0 | cl1 + cl2) +est_cl <- felm(y_cl ~ x1 + x2 | id + firm | 0 | cl1 + cl2, data = d) summary(est_cl) -\dontrun{ # Print ordinary standard errors: summary(est_cl, robust = FALSE) # Match cluster-robust SEs from Stata's reghdfe package: -summary(felm(y_cl ~ x + x2 | id + firm | 0 | cl1 + cl2, cmethod="reghdfe"))} +summary(felm(y_cl ~ x1 + x2 | id + firm | 0 | cl1 + cl2, + data = d, + cmethod = "reghdfe" +)) +## Restore default options options(oldopts) } @@ -370,14 +385,14 @@ options(oldopts) Cameron, A.C., J.B. Gelbach and D.L. Miller (2011) \cite{Robust inference with multiway clustering}, Journal of Business & Economic Statistics 29 (2011), no. 2, 238--249. -\url{http://dx.doi.org/10.1198/jbes.2010.07136} +\doi{10.1198/jbes.2010.07136} Kolesar, M., R. Chetty, J. Friedman, E. Glaeser, and G.W. Imbens (2014) \cite{Identification and Inference with Many Invalid Instruments}, Journal of Business & Economic Statistics (to appear). -\url{http://dx.doi.org/10.1080/07350015.2014.978175} +\doi{10.1080/07350015.2014.978175} } \seealso{ -\code{\link{getfe}} \code{\link{summary.felm}} -\code{\link{condfstat}} \code{\link{waldtest}} +\code{\link[=getfe]{getfe()}} \code{\link[=summary.felm]{summary.felm()}} +\code{\link[=condfstat]{condfstat()}} \code{\link[=waldtest]{waldtest()}} } diff --git a/man/fepois.Rd b/man/fepois.Rd new file mode 100644 index 0000000..93abe1a --- /dev/null +++ b/man/fepois.Rd @@ -0,0 +1,47 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/feglm.R +\name{fepois} +\alias{fepois} +\title{Fit a Poisson model with multiple group fixed effects} +\usage{ +fepois( + formula, + data, + offset = NULL, + subset = NULL, + robust = TRUE, + cluster = NULL, + tol = 1e-10 +) +} +\arguments{ +\item{formula}{an object of class '"formula"' (or one that can be coerced to +that class): a symbolic description of the model to be fitted. Similarly to +'lm'. See Details.} + +\item{data}{a data frame containing the variables of the model.} + +\item{offset}{this can be used to specify an \emph{a priori} known component +to be included in the linear predictor during fitting. This should be +\code{NULL} or a numeric vector or matrix of extents matching those of the +response. One or more \code{\link{offset}} terms can be included in the +formula instead or as well, and if more than one are specified their sum is +used. See \code{\link{model.offset}}.} + +\item{subset}{an optional vector specifying a subset of observations to be +used in the fitting process.} + +\item{robust}{logical value to return a robust standard error computation.} + +\item{cluster}{optional variable to group by and compute sandwich-type +robust standard errors. Should be a formula of the form \code{~x_j} or +an object that be coerced to a formula.} + +\item{tol}{tolerance value for GLM convergence criteria.} +} +\description{ +Fit a Poisson model with multiple group fixed effects +} +\seealso{ +felm +} diff --git a/man/fevcov.Rd b/man/fevcov.Rd index 08ecc4d..0fd6d5c 100644 --- a/man/fevcov.Rd +++ b/man/fevcov.Rd @@ -16,9 +16,9 @@ fevcov( } \arguments{ \item{est}{an object of class '"felm"', the result of a call to -\code{\link{felm}(keepX=TRUE)}.} +\verb{[felm](keepX=TRUE)}.} -\item{alpha}{a data frame, the result of a call to \code{\link{getfe}}.} +\item{alpha}{a data frame, the result of a call to \code{\link[=getfe]{getfe()}}.} \item{tol}{numeric. The absolute tolerance for the bias-corrected correlation.} @@ -64,15 +64,15 @@ effects. \code{alpha} must contain a full set of coefficients, and contain columns \code{'fe'} and \code{'effect'} like the default estimable functions from -\code{\link{efactory}}. +\code{\link[=efactory]{efactory()}}. -In the case that the \code{\link{felm}}-estimation has weights, it is the +In the case that the \code{\link[=felm]{felm()}}-estimation has weights, it is the weighted variances and covariance which are bias corrected. } \note{ Bias correction for IV-estimates are not supported as of now. -Note that if \code{est} is the result of a call to \code{\link{felm}} with +Note that if \code{est} is the result of a call to \code{\link[=felm]{felm()}} with \code{keepX=FALSE} (the default), the biases will be computed as if the covariates X are independent of the factors. This will be faster (typically by a factor of approx. 4), and possibly wronger. Note also that the @@ -81,7 +81,7 @@ some time. It would be wise to start out with quite liberal tolerances, e.g. \cite{tol=0.1}, to get an idea of the time requirements. If there are only two fixed effects, \code{fevcov} returns the same -information as \code{\link{bccorr}}, though in a slightly different format. +information as \code{\link[=bccorr]{bccorr()}}, though in a slightly different format. } \examples{ @@ -89,39 +89,39 @@ x <- rnorm(5000) x2 <- rnorm(length(x)) ## create individual and firm -id <- factor(sample(40,length(x),replace=TRUE)) -firm <- factor(sample(30,length(x),replace=TRUE,prob=c(2,rep(1,29)))) -foo <- factor(sample(20,length(x),replace=TRUE)) +id <- factor(sample(40, length(x), replace = TRUE)) +firm <- factor(sample(30, length(x), replace = TRUE, prob = c(2, rep(1, 29)))) +foo <- factor(sample(20, length(x), replace = TRUE)) ## effects id.eff <- rnorm(nlevels(id)) firm.eff <- runif(nlevels(firm)) -foo.eff <- rchisq(nlevels(foo),df=1) +foo.eff <- rchisq(nlevels(foo), df = 1) ## left hand side id.m <- id.eff[id] firm.m <- firm.eff[firm] foo.m <- foo.eff[foo] # normalize them -id.m <- id.m/sd(id.m) -firm.m <- firm.m/sd(firm.m) -foo.m <- foo.m/sd(foo.m) -y <- x + 0.25*x2 + id.m + firm.m + foo.m + rnorm(length(x),sd=2) -z <- x + 0.5*x2 + 0.7*id.m + 0.5*firm.m + 0.3*foo.m + rnorm(length(x),sd=2) +id.m <- id.m / sd(id.m) +firm.m <- firm.m / sd(firm.m) +foo.m <- foo.m / sd(foo.m) +y <- x + 0.25 * x2 + id.m + firm.m + foo.m + rnorm(length(x), sd = 2) +z <- x + 0.5 * x2 + 0.7 * id.m + 0.5 * firm.m + 0.3 * foo.m + rnorm(length(x), sd = 2) # make a data frame -fr <- data.frame(y,z,x,x2,id,firm,foo) +fr <- data.frame(y, z, x, x2, id, firm, foo) ## estimate and print result -est <- felm(y|z ~ x+x2|id+firm+foo, data=fr, keepX=TRUE) +est <- felm(y | z ~ x + x2 | id + firm + foo, data = fr, keepX = TRUE) # find bias corrections, there's little bias in this example -print(yv <- fevcov(est, lhs='y')) +print(yv <- fevcov(est, lhs = "y")) ## Here's how to compute the unbiased correlation matrix: cm <- cov2cor(yv) -structure(cm,bias=NULL) +structure(cm, bias = NULL) } \references{ Gaure, S. (2014), \cite{Correlation bias correction in two-way fixed-effects linear regression}, Stat 3(1):379-390, 2014. -\url{http://dx.doi.org/10.1002/sta4.68} +\doi{10.1002/sta4.68} } \seealso{ -\code{\link{varvars}} \code{\link{bccorr}} +\code{\link[=varvars]{varvars()}} \code{\link[=bccorr]{bccorr()}} } diff --git a/man/figures/README-pressure-1.png b/man/figures/README-pressure-1.png new file mode 100644 index 0000000..148a276 Binary files /dev/null and b/man/figures/README-pressure-1.png differ diff --git a/man/fixedse.Rd b/man/fixedse.Rd index 1200145..cb85343 100644 --- a/man/fixedse.Rd +++ b/man/fixedse.Rd @@ -7,7 +7,7 @@ fixedse(est, lhs = NULL, E) } \arguments{ -\item{est}{'felm' object. The result of a call to \code{\link{felm}}.} +\item{est}{'felm' object. The result of a call to \code{\link[=felm]{felm()}}.} \item{lhs}{character. Name of the left hand side, if more than one.} @@ -18,20 +18,20 @@ numeric. Vector of standard errors. } \description{ fixedse computes the standard errors for the fixed effects when there is only one. -While \code{\link{getfe}} can provide standard errors, it does so by bootstrapping +While \code{\link[=getfe]{getfe()}} can provide standard errors, it does so by bootstrapping for general estimable functions. In the special case that there's only a single fixed effect, and the estimable function is just the levels, this function can be used to -compute the fixed effects without bootstrapping. It requires that \code{\link{felm}} +compute the fixed effects without bootstrapping. It requires that \code{\link[=felm]{felm()}} is run with keepX=TRUE. } \examples{ x <- rnorm(1000) -f <- factor(sample(5,1000,replace=TRUE)) +f <- factor(sample(5, 1000, replace = TRUE)) y <- x + (1:5)[f] + rnorm(1000) -est <- felm(y ~ x | f, keepX=TRUE) -#both bootstrap and computed se: -cbind(getfe(est,ef=efactory(est,'ref'),se=TRUE), fse=fixedse(est)) -#compare with lm: -summary(lm(y ~x+f-1)) +est <- felm(y ~ x | f, keepX = TRUE) +# both bootstrap and computed se: +cbind(getfe(est, ef = efactory(est, "ref"), se = TRUE), fse = fixedse(est)) +# compare with lm: +summary(lm(y ~ x + f - 1)) } \keyword{internal} diff --git a/man/getfe.Rd b/man/getfe.Rd index bbddb3c..bc36ddb 100644 --- a/man/getfe.Rd +++ b/man/getfe.Rd @@ -18,7 +18,7 @@ getfe( } \arguments{ \item{obj}{object of class \code{"felm"}, usually, a result of a call to -\code{\link{felm}}} +\code{\link[=felm]{felm()}}} \item{references}{a vector of strings. If there are more than two factors and you have prior knowledge of what the reference levels should be like @@ -30,18 +30,18 @@ it as FALSE unless absolutely needed.} \item{method}{character string. Either 'cholesky', 'cg', or the default 'kaczmarz'. The latter is often very fast and consumes little memory, it -requires an estimable function to be specified, see \code{\link{efactory}}. +requires an estimable function to be specified, see \code{\link[=efactory]{efactory()}}. The 'cholesky' method is no longer maintained as the author sees no use for it.} \item{ef}{function. A function of two variables, a vector of group fixed -effects and a logical, i.e. \code{function(v,addnames)}. This function +effects and a logical, i.e. \verb{function(v,addnames)}. This function should be estimable and is used to transform the raw-coefficients \code{v} from the kaczmarz-method. The second variable indicates whether the function must return a named vector (if this is FALSE, one may skip the names, saving memory allocations and time). -If a string is specified, it is fed to the \code{\link{efactory}}-function. +If a string is specified, it is fed to the \code{\link[=efactory]{efactory()}}-function. The default function is one which picks one reference in each component. Can be set to \code{ef="ln"} to yield the minimal-norm solution from the @@ -76,11 +76,11 @@ function. } \description{ Compute the group fixed effects, i.e. the dummy parameters, which were swept -out during an estimation with \code{\link{felm}}. +out during an estimation with \code{\link[=felm]{felm()}}. } \details{ For the case with two factors (the terms in the second part of the formula -supplied to \code{\link{felm}}), one reference in each connected component +supplied to \code{\link[=felm]{felm()}}), one reference in each connected component is adequate when interpreting the results. For three or more factors, no such easy method is known; for the @@ -95,7 +95,7 @@ report connected components. In this case, it is not known which graph theoretic concept may be used to analyze the rank-deficiency. The standard errors returned by the Kaczmarz-method are bootstrapped, -keeping the other coefficients (from \code{\link{felm}}) constant, i.e. they +keeping the other coefficients (from \code{\link[=felm]{felm()}}) constant, i.e. they are from the variance when resampling the residuals. If \code{robust=TRUE}, heteroskedastic robust standard errors are estimated. If \code{robust=FALSE} and \code{cluster=TRUE}, clustered standard errors with the cluster @@ -104,42 +104,45 @@ is used for the cluster definition. } \examples{ -oldopts <- options(lfe.threads=2) +oldopts <- options("lfe.threads") +options(lfe.threads = 2) ## create covariates x <- rnorm(4000) x2 <- rnorm(length(x)) ## create individual and firm -id <- factor(sample(500,length(x),replace=TRUE)) -firm <- factor(sample(300,length(x),replace=TRUE)) +id <- factor(sample(500, length(x), replace = TRUE)) +firm <- factor(sample(300, length(x), replace = TRUE)) ## effects id.eff <- rlnorm(nlevels(id)) firm.eff <- rexp(nlevels(firm)) ## left hand side -y <- x + 0.25*x2 + id.eff[id] + firm.eff[firm] + rnorm(length(x)) +y <- x + 0.25 * x2 + id.eff[id] + firm.eff[firm] + rnorm(length(x)) ## estimate and print result -est <- felm(y ~ x+x2 | id + firm) +est <- felm(y ~ x + x2 | id + firm) summary(est) ## extract the group effects -alpha <- getfe(est,se=TRUE) +alpha <- getfe(est, se = TRUE) ## find some estimable functions, with standard errors, we don't get ## names so we must precompute some numerical indices in ef -idx <- match(c('id.5','id.6','firm.11','firm.12'),rownames(alpha)) -alpha[idx,] -ef <- function(v,addnames) { - w <- c(v[idx[[2]]]-v[idx[[1]]],v[idx[[4]]]+v[idx[[1]]], - v[idx[[4]]]-v[idx[[3]]]) - if(addnames) names(w) <-c('id6-id5','f12+id5','f12-f11') +idx <- match(c("id.5", "id.6", "firm.11", "firm.12"), rownames(alpha)) +alpha[idx, ] +ef <- function(v, addnames) { + w <- c( + v[idx[[2]]] - v[idx[[1]]], v[idx[[4]]] + v[idx[[1]]], + v[idx[[4]]] - v[idx[[3]]] + ) + if (addnames) names(w) <- c("id6-id5", "f12+id5", "f12-f11") w } -getfe(est,ef=ef,se=TRUE) +getfe(est, ef = ef, se = TRUE) options(oldopts) \dontrun{ -summary(lm(y ~ x+x2+id+firm-1)) +summary(lm(y ~ x + x2 + id + firm - 1)) } } diff --git a/man/is.estimable.Rd b/man/is.estimable.Rd index 567a702..45f5a43 100644 --- a/man/is.estimable.Rd +++ b/man/is.estimable.Rd @@ -33,11 +33,11 @@ the Kaczmarz method.} Returns a logical. } \description{ -Verify that a function you have written for \code{\link{getfe}} is indeed +Verify that a function you have written for \code{\link[=getfe]{getfe()}} is indeed estimable. } \details{ -When writing custom estimable functions for \code{\link{getfe}}, the +When writing custom estimable functions for \code{\link[=getfe]{getfe()}}, the function \code{is.estimable} can be used to test it for estimability. \code{is.estimable()} solves the sparse residual system with the Kaczmarz method, using two different initial values. Then \code{ef()} is applied to @@ -51,31 +51,32 @@ estimable, whereas the others are not. } \examples{ -oldopts <- options(lfe.threads=1) +oldopts <- options("lfe.threads") +options(lfe.threads = 2) ## create individual and firm -id <- factor(sample(5000,50000,replace=TRUE)) -firm <- factor(sample(3000,50000,replace=TRUE)) +id <- factor(sample(5000, 50000, replace = TRUE)) +firm <- factor(sample(3000, 50000, replace = TRUE)) ## create some estimable functions. It's faster to ## use numerical indices in ef rather than strings, and the input v ## to ef has no names, we have to add them when requested -ef <- function(v,addnames) { - w <- c(v[6]-v[5],v[7000]+v[5],v[7000]-v[6000]) - if(addnames) names(w) <-c('id6-id5','f2k+id5','f2k-f1k') +ef <- function(v, addnames) { + w <- c(v[6] - v[5], v[7000] + v[5], v[7000] - v[6000]) + if (addnames) names(w) <- c("id6-id5", "f2k+id5", "f2k-f1k") w } -is.estimable(ef,list(id=id,firm=firm)) +is.estimable(ef, list(id = id, firm = firm)) ## Then make an error; in the last coordinate, sum two firms -ef <- function(v,addnames) { - w <- c(v[6]-v[5],v[7000]+v[5],v[7000]+v[6000]) - if(addnames) names(w) <-c('id6-id5','f2k+id5','f2k-f1k') +ef <- function(v, addnames) { + w <- c(v[6] - v[5], v[7000] + v[5], v[7000] + v[6000]) + if (addnames) names(w) <- c("id6-id5", "f2k+id5", "f2k-f1k") w } -is.estimable(ef, list(id=id,firm=firm), keepdiff=TRUE) +is.estimable(ef, list(id = id, firm = firm), keepdiff = TRUE) options(oldopts) } \seealso{ -\code{\link{getfe}} +\code{\link[=getfe]{getfe()}} } diff --git a/man/kaczmarz.Rd b/man/kaczmarz.Rd index 9eba32d..9590459 100644 --- a/man/kaczmarz.Rd +++ b/man/kaczmarz.Rd @@ -39,38 +39,38 @@ Uses the Kaczmarz method to solve a system of the type Dx = R, where D is the matrix of dummies created from a list of factors. } \note{ -This function is used by \code{\link{getfe}}, it's quite specialized, +This function is used by \code{\link[=getfe]{getfe()}}, it's quite specialized, but it might be useful for other purposes too. In case of convergence problems, setting \code{options(lfe.usecg=TRUE)} will cause the kaczmarz() function to dispatch to the more general conjugate -gradient method of \code{\link{cgsolve}}. This may or may not be faster. +gradient method of \code{\link[=cgsolve]{cgsolve()}}. This may or may not be faster. } \examples{ ## create factors - f1 <- factor(sample(24000,100000,replace=TRUE)) - f2 <- factor(sample(20000,length(f1),replace=TRUE)) - f3 <- factor(sample(10000,length(f1),replace=TRUE)) - f4 <- factor(sample(8000,length(f1),replace=TRUE)) +f1 <- factor(sample(24000, 100000, replace = TRUE)) +f2 <- factor(sample(20000, length(f1), replace = TRUE)) +f3 <- factor(sample(10000, length(f1), replace = TRUE)) +f4 <- factor(sample(8000, length(f1), replace = TRUE)) ## the matrix of dummies - D <- makeDmatrix(list(f1,f2,f3,f4)) - dim(D) +D <- makeDmatrix(list(f1, f2, f3, f4)) +dim(D) ## an x - truex <- runif(ncol(D)) +truex <- runif(ncol(D)) ## and the right hand side - R <- as.vector(D \%*\% truex) +R <- as.vector(D \%*\% truex) ## solve it - sol <- kaczmarz(list(f1,f2,f3,f4),R) +sol <- kaczmarz(list(f1, f2, f3, f4), R) ## verify that the solution solves the system Dx = R - sqrt(sum((D \%*\% sol - R)^2)) +sqrt(sum((D \%*\% sol - R)^2)) ## but the solution is not equal to the true x, because the system is ## underdetermined - sqrt(sum((sol - truex)^2)) +sqrt(sum((sol - truex)^2)) ## moreover, the solution from kaczmarz has smaller norm - sqrt(sum(sol^2)) < sqrt(sum(truex^2)) +sqrt(sum(sol^2)) < sqrt(sum(truex^2)) } \seealso{ -\code{\link{cgsolve}} +\code{\link[=cgsolve]{cgsolve()}} } diff --git a/man/lfe-package.Rd b/man/lfe-package.Rd index d743227..071e823 100644 --- a/man/lfe-package.Rd +++ b/man/lfe-package.Rd @@ -16,7 +16,7 @@ limited mobility bias is also included. \details{ This package is intended for linear models with multiple group fixed effects, i.e. with 2 or more factors with a large number of levels. It -performs similar functions as \code{\link[stats]{lm}}, but it uses a special +performs similar functions as \code{\link[stats:lm]{stats::lm()}}, but it uses a special method for projecting out multiple group fixed effects from the normal equations, hence it is faster. It is a generalization of the within estimator. This may be required if the groups have high cardinality (many @@ -27,8 +27,8 @@ errors for the group effects by bootstrapping, but this is a very time- and memory-consuming process compared to finding the point estimates. If you only have a single huge factor, the package \pkg{plm} is probably better suited. If your factors don't have thousands of levels, -\code{\link[stats]{lm}} or other packages are probably better suited. -\pkg{lfe} is designed to produce the same results as \code{\link[stats]{lm}} +\code{\link[stats:lm]{stats::lm()}} or other packages are probably better suited. +\pkg{lfe} is designed to produce the same results as \code{\link[stats:lm]{stats::lm()}} will do if run with the full set of dummies. Projecting out interactions between continuous covariates and factors is @@ -39,28 +39,28 @@ The package does not support non-linear models. For GLMs with many dummies there is a package \pkg{alpaca} which uses similar methods to project them out. The estimation is done in two steps. First the other coefficients are -estimated with the function \code{\link{felm}} by centering on all the group +estimated with the function \code{\link[=felm]{felm()}} by centering on all the group means, followed by an OLS (similar to lm). Then the group effects are -extracted (if needed) with the function \code{\link{getfe}}. This method is +extracted (if needed) with the function \code{\link[=getfe]{getfe()}}. This method is described by \cite{Gaure (2013)}, but also appears in \cite{Guimaraes and Portugal (2010)}, disguised as the Gauss-Seidel algorithm. -There's also a function \code{\link{demeanlist}} which just does the +There's also a function \code{\link[=demeanlist]{demeanlist()}} which just does the centering on an arbitrary matrix or data frame, and there's a function -\code{\link{compfactor}} which computes the connected components which are +\code{\link[=compfactor]{compfactor()}} which computes the connected components which are used for interpreting the group effects when there are only two factors (see -the Abowd et al references), they are also returned by \code{\link{getfe}}. +the Abowd et al references), they are also returned by \code{\link[=getfe]{getfe()}}. For those who study the correlation between the fixed effects, like in -\cite{Abowd et al. (1999)}, there are functions \code{\link{bccorr}} and -\code{\link{fevcov}} for computing limited mobility bias corrected +\cite{Abowd et al. (1999)}, there are functions \code{\link[=bccorr]{bccorr()}} and +\code{\link[=fevcov]{fevcov()}} for computing limited mobility bias corrected correlations and variances with the method described in \cite{Gaure (2014b)}. Instrumental variable estimations are supported with 2SLS. Conditional F statistics for testing reduced rank weak instruments as in \cite{Sanderson -and Windmeijer (2015)} are available in \code{\link{condfstat}}. Joint -signficance testing of coefficients is available in \code{\link{waldtest}}. +and Windmeijer (2015)} are available in \code{\link[=condfstat]{condfstat()}}. Joint +significance testing of coefficients is available in \code{\link[=waldtest]{waldtest()}}. The centering on the means is done with a tolerance which is set by \code{options(lfe.eps=1e-8)} (the default). This is a somewhat conservative @@ -74,7 +74,7 @@ of threads is fetched upon loading the package from the environment variable \env{LFE_THREADS}, \env{OMP_THREAD_LIMIT}, \env{OMP_NUM_THREADS} or \env{NUMBER_OF_PROCESSORS} (for Windows), and stored by \code{options(lfe.threads=n)}. This option can be changed prior to calling -\code{\link{felm}}, if so desired. Note that, typically, \pkg{lfe} is +\code{\link[=felm]{felm()}}, if so desired. Note that, typically, \pkg{lfe} is limited by memory bandwidth, not cpu speed, thus fast memory and large cache is more important than clock frequency. It is therefore also not always true that running on all available cores is much better than running on half of @@ -91,7 +91,7 @@ interval in seconds is \code{options(lfe.pint=1800)}. The package has been tested on datasets with approx 20,000,000 observations with 15 covariates and approx 2,300,000 and 270,000 group levels (the -\code{\link{felm}} took about 50 minutes on 8 cpus, the \code{\link{getfe}} +\code{\link[=felm]{felm()}} took about 50 minutes on 8 cpus, the \code{\link[=getfe]{getfe()}} takes 5 minutes). Though, beware that not only the size of the dataset matters, but also its structure, as demonstrated by \cite{Gaure (2014a)}. @@ -112,30 +112,31 @@ the Stata module \code{reghdfe}. } \examples{ - oldopts <- options(lfe.threads=1) - x <- rnorm(1000) - x2 <- rnorm(length(x)) - id <- factor(sample(10,length(x),replace=TRUE)) - firm <- factor(sample(3,length(x),replace=TRUE,prob=c(2,1.5,1))) - year <- factor(sample(10,length(x),replace=TRUE,prob=c(2,1.5,rep(1,8)))) - id.eff <- rnorm(nlevels(id)) - firm.eff <- rnorm(nlevels(firm)) - year.eff <- rnorm(nlevels(year)) - y <- x + 0.25*x2 + id.eff[id] + firm.eff[firm] + - year.eff[year] + rnorm(length(x)) - est <- felm(y ~ x+x2 | id + firm + year) - summary(est) - - getfe(est,se=TRUE) +oldopts <- options("lfe.threads") +options(lfe.threads = 2) +x <- rnorm(1000) +x2 <- rnorm(length(x)) +id <- factor(sample(10, length(x), replace = TRUE)) +firm <- factor(sample(3, length(x), replace = TRUE, prob = c(2, 1.5, 1))) +year <- factor(sample(10, length(x), replace = TRUE, prob = c(2, 1.5, rep(1, 8)))) +id.eff <- rnorm(nlevels(id)) +firm.eff <- rnorm(nlevels(firm)) +year.eff <- rnorm(nlevels(year)) +y <- x + 0.25 * x2 + id.eff[id] + firm.eff[firm] + + year.eff[year] + rnorm(length(x)) +est <- felm(y ~ x + x2 | id + firm + year) +summary(est) + +getfe(est, se = TRUE) # compare with an ordinary lm - summary(lm(y ~ x+x2+id+firm+year-1)) - options(oldopts) +summary(lm(y ~ x + x2 + id + firm + year - 1)) +options(oldopts) } \references{ Abowd, J.M., F. Kramarz and D.N. Margolis (1999) \cite{High Wage Workers and High Wage Firms}, Econometrica 67 (1999), no. 2, 251--333. -\url{http://dx.doi.org/10.1111/1468-0262.00020} +\doi{10.1111/1468-0262.00020} Abowd, J.M., R. Creecy and F. Kramarz (2002) \cite{Computing Person and Firm Effects Using Linked Longitudinal Employer-Employee Data.} Technical Report @@ -145,25 +146,25 @@ TP-2002-06, U.S. Census Bureau. Andrews, M., L. Gill, T. Schank and R. Upward (2008) \cite{High wage workers and low wage firms: negative assortative matching or limited mobility bias?} J.R. Stat. Soc.(A) 171(3), 673--697. -\url{http://dx.doi.org/10.1111/j.1467-985X.2007.00533.x} +\doi{10.1111/j.1467-985X.2007.00533.x} Cornelissen, T. (2008) \cite{The stata command felsdvreg to fit a linear model with two high-dimensional fixed effects.} Stata Journal, 8(2):170--189, 2008. -\url{http://econpapers.repec.org/RePEc:tsj:stataj:v:8:y:2008:i:2:p:170-189} +\url{https://econpapers.repec.org/RePEc:tsj:stataj:v:8:y:2008:i:2:p:170-189} Correia, S. (2014) \cite{REGHDFE: Stata module to perform linear or instrumental-variable regression absorbing any number of high-dimensional fixed effects}, Statistical Software Components, Boston College Department -of Economics. \url{http://econpapers.repec.org/RePEc:boc:bocode:s457874} +of Economics. \url{https://econpapers.repec.org/RePEc:boc:bocode:s457874} Croissant, Y. and G. Millo (2008) \cite{Panel Data Econometrics in R: The plm Package}, Journal of Statistical Software, 27(2). -\url{http://www.jstatsoft.org/v27/i02/} +\url{https://www.jstatsoft.org/v27/i02/} Gaure, S. (2013) \cite{OLS with Multiple High Dimensional Category Variables.} Computational Statistics and Data Analysis, 66:8--18, 2013 -\url{http://dx.doi.org/10.1016/j.csda.2013.03.024} +\doi{10.1016/j.csda.2013.03.024} Gaure, S. (2014a) \cite{lfe: Linear Group Fixed Effects.} The R Journal, 5(2):104-117, Dec 2013. @@ -171,22 +172,22 @@ Gaure, S. (2014a) \cite{lfe: Linear Group Fixed Effects.} The R Journal, Gaure, S. (2014b), \cite{Correlation bias correction in two-way fixed-effects linear regression}, Stat 3(1):379-390, 2014. -\url{http://dx.doi.org/10.1002/sta4.68} +\doi{10.1002/sta4.68} Guimaraes, P. and Portugal, P. (2010) \cite{A simple feasible procedure to fit models with high-dimensional fixed effects.} The Stata Journal, 10(4):629--649, 2010. -\url{http://www.stata-journal.com/article.html?article=st0212} +\url{https://www.stata-journal.com/article.html?article=st0212} Ouazad, A. (2008) \cite{A2REG: Stata module to estimate models with two fixed effects.} Statistical Software Components S456942, Boston College Department of Economics. -\url{http://ideas.repec.org/c/boc/bocode/s456942.html} +\url{https://ideas.repec.org/c/boc/bocode/s456942.html} Sanderson, E. and F. Windmeijer (2014) \cite{A weak instrument F-test in linear IV models with multiple endogenous variables}, Journal of Econometrics, 2015. -\url{http://www.sciencedirect.com/science/article/pii/S0304407615001736} +\url{https://www.sciencedirect.com/science/article/pii/S0304407615001736} } \keyword{models} \keyword{regression} diff --git a/man/makeDmatrix.Rd b/man/makeDmatrix.Rd index 8c7b7d0..38b2f4d 100644 --- a/man/makeDmatrix.Rd +++ b/man/makeDmatrix.Rd @@ -24,8 +24,8 @@ documentation for illustrative purposes. } \examples{ - fl <- lapply(1:3, function(i) factor(sample(3,10,replace=TRUE))) - fl - makeDmatrix(fl, weights=seq(0.1,1,0.1)) +fl <- lapply(1:3, function(i) factor(sample(3, 10, replace = TRUE))) +fl +makeDmatrix(fl, weights = seq(0.1, 1, 0.1)) } diff --git a/man/mctrace.Rd b/man/mctrace.Rd index ffdeed9..cd3e48a 100644 --- a/man/mctrace.Rd +++ b/man/mctrace.Rd @@ -32,8 +32,8 @@ product fairly easy, and this is utilized to estimate the trace of the matrix. } \details{ -\code{mctrace} is used internally by \code{\link{fevcov}} and -\code{\link{bccorr}}, but has been made public since it might be useful for +\code{mctrace} is used internally by \code{\link[=fevcov]{fevcov()}} and +\code{\link[=bccorr]{bccorr()}}, but has been made public since it might be useful for other tasks as well. For any matrix \eqn{A}, the trace equals the sum of the diagonal elements, @@ -41,7 +41,7 @@ or the sum of the eigenvalues. However, if the size of the matrix is very large, we may not have a matrix representation, so the diagonal is not immediately available. In that case we can use the formula \eqn{tr(A) = E(x^t A x)}{tr(A) = E(x'Ax)} where \eqn{x} is a random vector with zero -expectation and \eqn{Var(x) = I}. We estimate the expecation with sample +expectation and \eqn{Var(x) = I}. We estimate the expectation with sample means. \code{mctrace} draws \eqn{x} in \eqn{\{-1,1\}^N}{{-1,1}}, and evaluates \code{mat} on these vectors. @@ -67,18 +67,18 @@ current estimate of the trace (i.e. the current sample mean). } \examples{ - A <- matrix(rnorm(25),5) - fun <- function(x) A \%*\% x - sum(diag(A)) - sum(eigen(A,only.values=TRUE)$values) - # mctrace is not really useful for small problems. - mctrace(fun,ncol(A),tol=0.05) - # try a larger problem (3000x3000): - f1 <- factor(sample(1500,3000,replace=TRUE)) - f2 <- factor(sample(1500,3000,replace=TRUE)) - fl <- list(f1,f2) - mctrace(fl,tol=-5) - # exact: - length(f1) - nlevels(f1) - nlevels(f2) + nlevels(compfactor(fl)) +A <- matrix(rnorm(25), 5) +fun <- function(x) A \%*\% x +sum(diag(A)) +sum(eigen(A, only.values = TRUE)$values) +# mctrace is not really useful for small problems. +mctrace(fun, ncol(A), tol = 0.05) +# try a larger problem (3000x3000): +f1 <- factor(sample(1500, 3000, replace = TRUE)) +f2 <- factor(sample(1500, 3000, replace = TRUE)) +fl <- list(f1, f2) +mctrace(fl, tol = -5) +# exact: +length(f1) - nlevels(f1) - nlevels(f2) + nlevels(compfactor(fl)) } diff --git a/man/nlexpect.Rd b/man/nlexpect.Rd index 3313050..8787d0e 100644 --- a/man/nlexpect.Rd +++ b/man/nlexpect.Rd @@ -21,7 +21,7 @@ nlexpect( } \arguments{ \item{est}{object of class \code{"felm"} or \code{"lm"}, a result of a call to -\code{\link{felm}} or \code{lm}.} +\code{\link[=felm]{felm()}} or \code{lm}.} \item{fun}{function of coefficients to be integrated. Can also be a \code{quote}d expression.} @@ -46,7 +46,7 @@ package \pkg{R2Cuba} disappeared.} \item{max.eval}{integer. Maximum number of integral evaluations.} -\item{method}{character. A method specification usable by \code{cubature::cubintegrate}. +\item{method}{character. A method specification usable by \code{cubature::cubintegrate}. The documentation there says that \code{"pcubature"} is good for smooth integrands of low dimensions.} \item{vectorize}{logical or numeric. Use vectorized function evaluation from package @@ -77,7 +77,7 @@ The list of coefficients used by \code{fun} must be specified in If the function is simple, it can be specified as a quoted expression like \code{quote(a*b+log(abs(d)))}. In this case, if \code{coefs} is not -specified, it will be set to the list of all the variables occuring in the +specified, it will be set to the list of all the variables occurring in the expression which are also names of coefficients. \code{fun} may return a vector of values, in which case a vector of @@ -87,15 +87,14 @@ quite compute intensive 4-dimensional integral will be computed, compared to two cheap 2-dimensional integrals if you do them separately. There is nothing to gain from using vector-valued functions compared to multiple calls to \code{nlexpect()}. -You may of course also integrate inequalites like \code{quote(abs(x1-0.2) > -0.2)} to simulate the probability from t-tests or Wald-tests. See the +You may of course also integrate inequalities like \code{quote(abs(x1-0.2) > 0.2)} to simulate the probability from t-tests or Wald-tests. See the examples. The function you provide will get an argument \code{...} if it does not have one already. It will also be passed an argument \code{.z} which contains the actual coefficients in normalized coordinates, i.e. if \code{ch} is the Cholesky decomposition of the covariance matrix, and \code{pt} are the point -estimates, the coefficients will be \code{pt + ch \%*\% .z}. The first argument +estimates, the coefficients will be \verb{pt + ch \\\%*\\\% .z}. The first argument is a vector with names corresponding to the coefficients. If you specify \code{vectorized=TRUE}, your function will be passed a list with vectors @@ -116,60 +115,60 @@ routine \code{cubature::cubintegrate} and the function to be integrated. } \note{ An alternative to this method is to use the \code{bootexpr} argument -with \code{\link{felm}}, to do a Monte Carlo integration. +with \code{\link[=felm]{felm()}}, to do a Monte Carlo integration. } \examples{ N <- 100 x1 <- rnorm(N) # make some correlation -x2 <- 0.1*rnorm(N) + 0.1*x1 -y <- 0.1*x1 + x2 + rnorm(N) +x2 <- 0.1 * rnorm(N) + 0.1 * x1 +y <- 0.1 * x1 + x2 + rnorm(N) summary(est <- felm(y ~ x1 + x2)) -pt1 <- coef(est)['x1'] -pt2 <- coef(est)['x2'] +pt1 <- coef(est)["x1"] +pt2 <- coef(est)["x2"] # expected values of coefficients, should match the summary # and variance, i.e. square of standard errors in the summary -nlexpect(est, quote(c(x1=x1,x2=x2,var=c((x1-pt1)^2,(x2-pt2)^2)))) +nlexpect(est, quote(c(x1 = x1, x2 = x2, var = c((x1 - pt1)^2, (x2 - pt2)^2)))) \donttest{ # the covariance matrix: -nlexpect(est, tcrossprod(as.matrix(c(x1-pt1,x2-pt2)))) +nlexpect(est, tcrossprod(as.matrix(c(x1 - pt1, x2 - pt2)))) } -#Wald test of single variable -waldtest(est, ~x1)['p.F'] +# Wald test of single variable +waldtest(est, ~x1)["p.F"] # the same with nlexpect, i.e. probability for observing abs(x1)>abs(pt1) conditional # on E(x1) = 0. -nlexpect(est, (x1-pt1)^2 > pt1^2, tol=1e-7, vectorize=TRUE) +nlexpect(est, (x1 - pt1)^2 > pt1^2, tol = 1e-7, vectorize = TRUE) # which is the same as -2*nlexpect(est, x1*sign(pt1) < 0) +2 * nlexpect(est, x1 * sign(pt1) < 0) # Here's a multivalued, vectorized example -nlexpect(est, rbind(a=x1*x2 < pt1, b=x1*x2 > 0), vectorize=TRUE, method='divonne') +nlexpect(est, rbind(a = x1 * x2 < pt1, b = x1 * x2 > 0), vectorize = TRUE, method = "divonne") \donttest{ # Non-linear test: # A simple one, what's the probability that product x1*x2 is between 0 and |E(x1)|? -nlexpect(est, x1*x2 > 0 & x1*x2 < abs(pt1), vectorize=TRUE, method='divonne') +nlexpect(est, x1 * x2 > 0 & x1 * x2 < abs(pt1), vectorize = TRUE, method = "divonne") # Then a more complicated one with the expected value of a polynomal in the coefficients -f <- function(x) c(poly=x[['x1']]*(6*x[['x1']]-x[['x2']]^2)) +f <- function(x) c(poly = x[["x1"]] * (6 * x[["x1"]] - x[["x2"]]^2)) # This is the linearized test: -waldtest(est, f)['p.F'] +waldtest(est, f)["p.F"] # In general, for a function f, the non-linear Wald test is something like # the following: # expected value of function -Ef <- nlexpect(est, f, coefs=c('x1','x2')) +Ef <- nlexpect(est, f, coefs = c("x1", "x2")) # point value of function -Pf <- f(c(pt1,pt2)) +Pf <- f(c(pt1, pt2)) # similar to a Wald test, but non-linear: -nlexpect(est, function(x) (f(x)-Ef)^2 > Pf^2, c('x1','x2'), vectorize=TRUE) +nlexpect(est, function(x) (f(x) - Ef)^2 > Pf^2, c("x1", "x2"), vectorize = TRUE) # one-sided -nlexpect(est, function(x) f(x)-Ef > abs(Pf), c('x1','x2'), vectorize=TRUE) +nlexpect(est, function(x) f(x) - Ef > abs(Pf), c("x1", "x2"), vectorize = TRUE) # other sided -nlexpect(est, function(x) f(x)-Ef < -abs(Pf), c('x1','x2'), vectorize=TRUE) +nlexpect(est, function(x) f(x) - Ef < -abs(Pf), c("x1", "x2"), vectorize = TRUE) } } \seealso{ -\code{\link{waldtest}} +\code{\link[=waldtest]{waldtest()}} } diff --git a/man/sargan.Rd b/man/sargan.Rd index b349918..4008fb0 100644 --- a/man/sargan.Rd +++ b/man/sargan.Rd @@ -7,11 +7,11 @@ sargan(object, ..., lhs = object$lhs[1]) } \arguments{ -\item{object}{and object type '"felm"', the return value from \code{\link{felm}}.} +\item{object}{and object type '"felm"', the return value from \code{\link[=felm]{felm()}}.} \item{...}{Not used at the moment.} -\item{lhs}{in case of multiple left hand sides, specify the name of the left +\item{lhs}{in case of multiple left hand sides, specify the name of the left hand side for which you want to compute Sargan's S.} } \value{ diff --git a/man/summary.felm.Rd b/man/summary.felm.Rd index af4441d..1371cd8 100644 --- a/man/summary.felm.Rd +++ b/man/summary.felm.Rd @@ -44,14 +44,14 @@ and corresponding (two-sided) p-value.} \item{fstat}{F-statistic.} \item{pval}{P-values.} \item{P.fstat}{Projected F-statistic. The result of a -call to \code{\link{waldtest}}} +call to \code{\link[=waldtest]{waldtest()}}} \item{fe}{list of factors. A list of the terms in the second part of the model.} \item{lhs.}{character. If \code{object} is the result of an estimation with multiple left hand sides, the actual argument \code{lhs} will be copied to this field.} \item{iv1fstat}{F-statistic for excluded instruments in 1. step IV, see -\code{\link{felm}}.} +\code{\link[=felm]{felm()}}.} \item{iv1pval}{P-value for \code{iv1fstat}.} } \description{ @@ -79,5 +79,5 @@ For a 1st stage IV-regression, an F-statistic against the model with excluded instruments is also computed. } \seealso{ -\code{\link{waldtest}} +\code{\link[=waldtest]{waldtest()}} } diff --git a/man/varvars.Rd b/man/varvars.Rd index a352173..557eaf9 100644 --- a/man/varvars.Rd +++ b/man/varvars.Rd @@ -8,9 +8,9 @@ varvars(est, alpha = getfe(est), tol = 0.01, biascorrect = FALSE, lhs = NULL) } \arguments{ \item{est}{an object of class '"felm"', the result of a call to -\code{\link{felm}(keepX=TRUE)}.} +\verb{[felm](keepX=TRUE)}.} -\item{alpha}{a data frame, the result of a call to \code{\link{getfe}}.} +\item{alpha}{a data frame, the result of a call to \code{\link[=getfe]{getfe()}}.} \item{tol}{numeric. The absolute tolerance for the bias-corrected correlation.} @@ -22,7 +22,7 @@ correlation.} \value{ \code{varvars} returns a vector with a variance estimate for each fixed effect variance. I.e. for the diagonal returned by -\code{\link{fevcov}}. +\code{\link[=fevcov]{fevcov()}}. } \description{ Compute the variance of the fixed effect variance estimate @@ -31,21 +31,21 @@ Compute the variance of the fixed effect variance estimate With a model like \eqn{y = X\beta + D\theta + F\psi + \epsilon}, where \eqn{D} and \eqn{F} are matrices with dummy encoded factors, one application of \pkg{lfe} is to study the variances \eqn{var(D\theta)}, \eqn{var(F\psi)} and covariances -\eqn{cov(D\theta, F\psi)}. The function \code{\link{fevcov}} computes bias corrected +\eqn{cov(D\theta, F\psi)}. The function \code{\link[=fevcov]{fevcov()}} computes bias corrected variances and covariances. However, these variance estimates are still -random variables for which \code{\link{fevcov}} only estimate the +random variables for which \code{\link[=fevcov]{fevcov()}} only estimate the expectation. The function \code{varvars} estimates the variance of these estimates. This function returns valid results only for normally distributed residuals. Note that the estimates for the fixed effect variances from -\code{\link{fevcov}} are not normally distributed, but a sum of chi-square +\code{\link[=fevcov]{fevcov()}} are not normally distributed, but a sum of chi-square distributions which depends on the eigenvalues of certain large matrices. We do not compute that distribution. The variances returned by \code{varvars} can therefore \emph{not} be used directly to estimate confidence intervals, other than through coarse methods like the Chebyshev inequality. These estimates only serve as a rough guideline as to how wrong the variance -estimates from \code{\link{fevcov}} might be. +estimates from \code{\link[=fevcov]{fevcov()}} might be. Like the fixed effect variances themselves, their variances are also biased upwards. Correcting this bias can be costly, and is therefore by default @@ -56,11 +56,11 @@ large datasets they will be quite small. } \note{ The \code{tol} argument specifies the tolerance as in -\code{\link{fevcov}}. Note that if \code{est} is the result of a call to -\code{\link{felm}} with \code{keepX=FALSE} (the default), the variances will +\code{\link[=fevcov]{fevcov()}}. Note that if \code{est} is the result of a call to +\code{\link[=felm]{felm()}} with \code{keepX=FALSE} (the default), the variances will be estimated as if the covariates X are independent of the factors. There is currently no function available for estimating the variance of the -covariance estimates from \code{\link{fevcov}}. +covariance estimates from \code{\link[=fevcov]{fevcov()}}. The cited paper does not contain the expressions for the variances computed by \code{varvars} (there's a 10 page limit in that journal), though they can @@ -73,23 +73,23 @@ x <- rnorm(500) x2 <- rnorm(length(x)) ## create individual and firm -id <- factor(sample(40,length(x),replace=TRUE)) -firm <- factor(sample(30,length(x),replace=TRUE,prob=c(2,rep(1,29)))) -foo <- factor(sample(20,length(x),replace=TRUE)) +id <- factor(sample(40, length(x), replace = TRUE)) +firm <- factor(sample(30, length(x), replace = TRUE, prob = c(2, rep(1, 29)))) +foo <- factor(sample(20, length(x), replace = TRUE)) ## effects id.eff <- rnorm(nlevels(id)) firm.eff <- rnorm(nlevels(firm)) foo.eff <- rnorm(nlevels(foo)) ## left hand side id.m <- id.eff[id] -firm.m <- 2*firm.eff[firm] -foo.m <- 3*foo.eff[foo] -y <- x + 0.25*x2 + id.m + firm.m + foo.m + rnorm(length(x)) +firm.m <- 2 * firm.eff[firm] +foo.m <- 3 * foo.eff[foo] +y <- x + 0.25 * x2 + id.m + firm.m + foo.m + rnorm(length(x)) # make a data frame -fr <- data.frame(y,x,x2,id,firm,foo) +fr <- data.frame(y, x, x2, id, firm, foo) ## estimate and print result -est <- felm(y ~ x+x2|id+firm+foo, data=fr, keepX=TRUE) +est <- felm(y ~ x + x2 | id + firm + foo, data = fr, keepX = TRUE) alpha <- getfe(est) # estimate the covariance matrix of the fixed effects fevcov(est, alpha) @@ -102,5 +102,5 @@ Gaure, S. (2014), \cite{Correlation bias correction in two-way fixed-effects linear regression}, Stat 3(1):379-390, 2014. } \seealso{ -\code{\link{bccorr}} \code{\link{fevcov}} +\code{\link[=bccorr]{bccorr()}} \code{\link[=fevcov]{fevcov()}} } diff --git a/man/waldtest.Rd b/man/waldtest.Rd index af6043b..8e47800 100644 --- a/man/waldtest.Rd +++ b/man/waldtest.Rd @@ -16,7 +16,7 @@ waldtest( } \arguments{ \item{object}{object of class \code{"felm"}, a result of a call to -\code{\link{felm}}.} +\code{\link[=felm]{felm()}}.} \item{R}{matrix, character, formula, function, integer or logical. Specification of which exclusions to test.} @@ -65,31 +65,31 @@ If \code{R} is a function (of the coefficients), an approximate Wald test against H0: \code{R(beta) == 0}, using the Delta-method, is computed. In case of an IV-estimation, the names for the endogenous variables in -\code{coef(object)} are of the type \code{"`Q(fit)`"} which is a bit dull to +\code{coef(object)} are of the type \verb{"}Q(fit)\verb{"} which is a bit dull to type; if all the endogenous variables are to be tested they can be specified as \code{"endovars"}. It is also possible to specify an endogenous variable simply as \code{"Q"}, and \code{waldtest} will add the other syntactic sugar -to obtain \code{"`Q(fit)`"}. +to obtain \verb{"}Q(fit)\verb{"}. The \code{type} argument works as follows. If \code{type=='default'} it is assumed that the residuals are i.i.d., unless a cluster structure was -specified to \code{\link{felm}}. If \code{type=='robust'}, a heteroscedastic +specified to \code{\link[=felm]{felm()}}. If \code{type=='robust'}, a heteroscedastic structure is assumed, even if a cluster structure was specified in -\code{\link{felm}}. +\code{\link[=felm]{felm()}}. } \examples{ x <- rnorm(10000) x2 <- rnorm(length(x)) -y <- x - 0.2*x2 + rnorm(length(x)) -#Also works for lm -summary(est <- lm(y ~ x + x2 )) +y <- x - 0.2 * x2 + rnorm(length(x)) +# Also works for lm +summary(est <- lm(y ~ x + x2)) # We do not reject the true values -waldtest(est, ~ x-1|x2+0.2|`(Intercept)`) +waldtest(est, ~ x - 1 | x2 + 0.2 | `(Intercept)`) # The Delta-method coincides when the function is linear: waldtest(est, function(x) x - c(0, 1, -0.2)) } \seealso{ -\code{\link{nlexpect}} +\code{\link[=nlexpect]{nlexpect()}} } diff --git a/src/Crowsum.c b/src/Crowsum.c deleted file mode 100644 index bd8a60c..0000000 --- a/src/Crowsum.c +++ /dev/null @@ -1,69 +0,0 @@ -#include "lfe.h" -//#ifdef _OPENMP -//#include -//#endif -SEXP Crowsum(SEXP Rmat, SEXP Rfactor, SEXP Rmean) { - if(!IS_NUMERIC(Rmat)) error("Only numeric matrices accepted"); - if(!isInteger(Rfactor) && !isFactor(Rfactor)) error("Only factor or integer vector accepted"); - R_xlen_t len = xlength(Rmat); - mybigint_t cols = 0, rows=0; - int *f = INTEGER(Rfactor); - double *mat; - int nlev; - SEXP res; - double *mres; - int mean = INTEGER(AS_LOGICAL(Rmean))[0]; - int *table = NULL; - // int nthr = INTEGER(threads)[0]; - mat = REAL(Rmat); - if(isMatrix(Rmat)) { - cols = ncols(Rmat); - rows = nrows(Rmat); - } else { - cols = 1; - rows = len; - } - if(length(Rfactor) != rows) error("matrix/vector must have same length as factor"); - - nlev = nlevels(Rfactor); - - for(int i = 0; i < rows; i++) { - if(f[i] < 1 || ISNA(f[i])) error("Missing levels not supported"); - if(f[i] > nlev) error("Level for %d is %d, too large %d",i,f[i],nlev); - } - - if(mean) { - table = (int*) R_alloc(nlev,sizeof(int)); - for(int i = 0; i < nlev; i++) table[i] = 0; - for(int i = 0; i < rows; i++) table[f[i]-1]++; - } - - // Allocate resultant matrix - PROTECT(res = allocMatrix(REALSXP, nlev, cols)); - - SEXP dn; - SEXP rdn = GET_DIMNAMES(Rmat); - PROTECT(dn = allocVector(VECSXP,2)); - SET_VECTOR_ELT(dn,0,GET_LEVELS(Rfactor)); - if(!isNull(rdn)) SET_VECTOR_ELT(dn,1,VECTOR_ELT(rdn,1)); - SET_DIMNAMES(res,dn); - UNPROTECT(1); - - mres = REAL(res); - // Now, run through column by column, summing the levels - //#pragma omp parallel for num_threads(nthr) - memset(mres,0,nlev*cols*sizeof(double)); - mres--; // factor levels are 1-based - for(int k = 0; k < cols; k++,mres+=nlev) { - for(int i = 0; i < rows; i++) { - mres[f[i]] += *mat++; - } - } - if(mean) { - mres = REAL(res); - for(int k = 0; k < cols; k++, mres+=nlev) - for(int i = 0; i < nlev; i++) mres[i] /= table[i]; - } - UNPROTECT(1); - return(res); -} diff --git a/src/Makevars.in b/src/Makevars.in index 0626df4..4d4d2f8 100644 --- a/src/Makevars.in +++ b/src/Makevars.in @@ -1,3 +1,2 @@ -# $Id: Makevars.in 1748 2015-07-10 20:29:00Z sgaure $ PKG_LIBS=@PTHREAD_LIBS@ @PTHREAD_CFLAGS@ $(BLAS_LIBS) $(FLIBS) $(SHLIB_OPENMP_CFLAGS) PKG_CFLAGS=$(C_VISIBILITY) $(SHLIB_OPENMP_CFLAGS) @PTHREAD_CFLAGS@ @DFLAGS@ diff --git a/src/Makevars.win b/src/Makevars.win index 06850d8..f158c8d 100644 --- a/src/Makevars.win +++ b/src/Makevars.win @@ -1,5 +1,3 @@ PKG_LIBS=$(BLAS_LIBS) $(FLIBS) PKG_CFLAGS=$(C_VISIBILITY) -OBJECTS=lfe.o demean.o factor.o kaczmarz.o utils.o Crowsum.o - - +OBJECTS=lfe.o demean.o factor.o kaczmarz.o utils.o crowsum.o diff --git a/src/crowsum.c b/src/crowsum.c new file mode 100644 index 0000000..d630818 --- /dev/null +++ b/src/crowsum.c @@ -0,0 +1,87 @@ +#include "lfe.h" +// #ifdef _OPENMP +// #include +// #endif +SEXP crowsum(SEXP Rmat, SEXP Rfactor, SEXP Rmean) +{ + if (!IS_NUMERIC(Rmat)) + error("Only numeric matrices accepted"); + if (!isInteger(Rfactor) && !isFactor(Rfactor)) + error("Only factor or integer vector accepted"); + R_xlen_t len = xlength(Rmat); + mybigint_t cols = 0, rows = 0; + int *f = INTEGER(Rfactor); + double *mat; + int nlev; + SEXP res; + double *mres; + int mean = INTEGER(AS_LOGICAL(Rmean))[0]; + int *table = NULL; + // int nthr = INTEGER(threads)[0]; + mat = REAL(Rmat); + if (isMatrix(Rmat)) + { + cols = ncols(Rmat); + rows = nrows(Rmat); + } + else + { + cols = 1; + rows = len; + } + if (length(Rfactor) != rows) + error("matrix/vector must have same length as factor"); + + nlev = nlevels(Rfactor); + + for (int i = 0; i < rows; i++) + { + if (f[i] < 1 || ISNA(f[i])) + error("Missing levels not supported"); + if (f[i] > nlev) + error("Level for %d is %d, too large %d", i, f[i], nlev); + } + + if (mean) + { + table = (int *)R_alloc(nlev, sizeof(int)); + for (int i = 0; i < nlev; i++) + table[i] = 0; + for (int i = 0; i < rows; i++) + table[f[i] - 1]++; + } + + // Allocate resultant matrix + PROTECT(res = allocMatrix(REALSXP, nlev, cols)); + + SEXP dn; + SEXP rdn = GET_DIMNAMES(Rmat); + PROTECT(dn = allocVector(VECSXP, 2)); + SET_VECTOR_ELT(dn, 0, GET_LEVELS(Rfactor)); + if (!isNull(rdn)) + SET_VECTOR_ELT(dn, 1, VECTOR_ELT(rdn, 1)); + SET_DIMNAMES(res, dn); + UNPROTECT(1); + + mres = REAL(res); + // Now, run through column by column, summing the levels + // #pragma omp parallel for num_threads(nthr) + memset(mres, 0, nlev * cols * sizeof(double)); + mres--; // factor levels are 1-based + for (int k = 0; k < cols; k++, mres += nlev) + { + for (int i = 0; i < rows; i++) + { + mres[f[i]] += *mat++; + } + } + if (mean) + { + mres = REAL(res); + for (int k = 0; k < cols; k++, mres += nlev) + for (int i = 0; i < nlev; i++) + mres[i] /= table[i]; + } + UNPROTECT(1); + return (res); +} diff --git a/src/demean.c b/src/demean.c index 8bea734..b04ec5c 100644 --- a/src/demean.c +++ b/src/demean.c @@ -1,5 +1,5 @@ #include "lfe.h" -/* Need sprintf */ +/* Need snprintf */ #include /* @@ -152,7 +152,7 @@ static int demean(int N, double *vec, double *weights,int *scale, if(t < 0.49) { char buf[256]; if(nm2 > 1e-15*nm) { - sprintf(buf,"Demeaning of vec %d failed after %d iterations, t=%.1e, nm2=%.1enm\n", + snprintf(buf,sizeof(buf),"Demeaning of vec %d failed after %d iterations, t=%.1e, nm2=%.1enm\n", vecnum,iter,t,nm2/nm); pushmsg(buf,lock); okconv = 0; @@ -184,7 +184,7 @@ static int demean(int N, double *vec, double *weights,int *scale, if(!gkacc && c >= 1.0 && iter > 100) { char buf[256]; - sprintf(buf,"Demeaning of vec %d failed after %d iterations, c-1=%.0e, delta=%.1e\n", + snprintf(buf,sizeof(buf),"Demeaning of vec %d failed after %d iterations, c-1=%.0e, delta=%.1e\n", vecnum,iter,c-1.0,delta); pushmsg(buf,lock); okconv = 0; @@ -219,7 +219,7 @@ static int demean(int N, double *vec, double *weights,int *scale, localtime_r(&arriv,&tmarriv); #endif strftime(timbuf, sizeof(timbuf), "%c", &tmarriv); - sprintf(buf,"...centering vec %d i:%d c:%.1e d:%.1e(t:%.1e) ETA:%s\n", + snprintf(buf,sizeof(buf),"...centering vec %d i:%d c:%.1e d:%.1e(t:%.1e) ETA:%s\n", vecnum,iter,1.0-c,delta,target,timbuf); pushmsg(buf,lock); lastiter = iter; @@ -284,8 +284,6 @@ static void *demeanlist_thr(void *varg) { #ifndef USEOMP char thrname[16]; char buf[256]; - // snprintf(thrname,16, "Ct %d/%d",vecnum+1, arg->K); - // cran whines about truncation snprintf(buf,256,"Ct %d/%d",vecnum+1, arg->K); buf[15] = 0; memcpy(thrname,buf,16); @@ -306,7 +304,7 @@ static void *demeanlist_thr(void *varg) { (arg->done)++; if(arg->quiet > 0 && now > arg->last + arg->quiet && arg->K > 1) { char buf[256]; - sprintf(buf,"...finished centering %d of %d vectors in %d seconds\n", + snprintf(buf,sizeof(buf),"...finished centering %d of %d vectors in %d seconds\n", arg->done,arg->K,(int)(now-arg->start)); arg->last = now; UNLOCK(arg->lock); diff --git a/src/factor.c b/src/factor.c index d6bfb8b..659879f 100644 --- a/src/factor.c +++ b/src/factor.c @@ -1,354 +1,424 @@ -/* - $Id: factor.c 1662 2015-03-20 15:04:22Z sgaure $ -*/ - #include "lfe.h" -static void invertfactor(FACTOR *f,int N) { +static void invertfactor(FACTOR *f, int N) +{ int nlev = f->nlevels; int *curoff; int i; - f->ii = (int*) R_alloc(nlev+1,sizeof(int)); - f->gpl = (int*) R_alloc(N,sizeof(int)); + f->ii = (int *)R_alloc(nlev + 1, sizeof(int)); + f->gpl = (int *)R_alloc(N, sizeof(int)); - memset(f->ii,0,sizeof(int)*(nlev+1)); + memset(f->ii, 0, sizeof(int) * (nlev + 1)); /* find sizes of groups */ - for(i = 0; i < N; i++) { + for (i = 0; i < N; i++) + { int gp = f->group[i]; - if(gp < 1) error("Factors can not have missing levels"); + if (gp < 1) + error("Factors can not have missing levels"); f->ii[gp]++; } /* cumulative */ - for(i = 1; i < nlev+1; i++) { - f->ii[i] += f->ii[i-1]; + for (i = 1; i < nlev + 1; i++) + { + f->ii[i] += f->ii[i - 1]; } - curoff = Calloc(nlev+1,int); - for(i = 0; i < N; i++) { - int gp = f->group[i]-1; - f->gpl[f->ii[gp]+curoff[gp]] = i; + curoff = Calloc(nlev + 1, int); + for (i = 0; i < N; i++) + { + int gp = f->group[i] - 1; + f->gpl[f->ii[gp] + curoff[gp]] = i; curoff[gp]++; } Free(curoff); } -FACTOR** makefactors(SEXP flist, int allowmissing, double *weights) { +FACTOR **makefactors(SEXP flist, int allowmissing, double *weights) +{ FACTOR **factors; int numfac = LENGTH(flist); - int N=0; + int N = 0; int oneiter = 0; numfac = 0; - for(int i = 0; i < LENGTH(flist); i++) { - SEXP sf = VECTOR_ELT(flist,i); - SEXP xattr = getAttrib(sf,install("x")); - if(isNull(xattr)) { + for (int i = 0; i < LENGTH(flist); i++) + { + SEXP sf = VECTOR_ELT(flist, i); + SEXP xattr = getAttrib(sf, install("x")); + if (isNull(xattr)) + { numfac++; continue; - } else if(LENGTH(flist) == 1) { + } + else if (LENGTH(flist) == 1) + { SEXP ortho = getAttrib(xattr, install("ortho")); - if(isLogical(ortho)) oneiter = LOGICAL(ortho)[0]; + if (isLogical(ortho)) + oneiter = LOGICAL(ortho)[0]; } - if(!isMatrix(xattr)) { + if (!isMatrix(xattr)) + { numfac++; continue; } numfac += ncols(xattr); } - if(!oneiter) { + if (!oneiter) + { SEXP Roneiter = getAttrib(flist, install("oneiter")); - if(isLogical(Roneiter)) oneiter = LOGICAL(Roneiter)[0]; + if (isLogical(Roneiter)) + oneiter = LOGICAL(Roneiter)[0]; } - factors = (FACTOR**) R_alloc(numfac+1,sizeof(FACTOR*)); + factors = (FACTOR **)R_alloc(numfac + 1, sizeof(FACTOR *)); factors[numfac] = NULL; int truefac = 0; - for(int i = 0; i < LENGTH(flist); i++) { + for (int i = 0; i < LENGTH(flist); i++) + { int len; FACTOR *f; - len = LENGTH(VECTOR_ELT(flist,i)); - if(i == 0) { + len = LENGTH(VECTOR_ELT(flist, i)); + if (i == 0) + { N = len; - } else if(len != N) { - error("All factors must have the same length %d %d",len,N); } - - f = factors[truefac++] = (FACTOR*) R_alloc(1,sizeof(FACTOR)); - f->group = INTEGER(VECTOR_ELT(flist,i)); - f->nlevels = LENGTH(getAttrib(VECTOR_ELT(flist,i),R_LevelsSymbol)); - if(f->nlevels <= 0) error("factor %d in list has no levels\n",i+1); + else if (len != N) + { + error("All factors must have the same length %d %d", len, N); + } + + f = factors[truefac++] = (FACTOR *)R_alloc(1, sizeof(FACTOR)); + f->group = INTEGER(VECTOR_ELT(flist, i)); + f->nlevels = LENGTH(getAttrib(VECTOR_ELT(flist, i), R_LevelsSymbol)); + if (f->nlevels <= 0) + error("factor %d in list has no levels\n", i + 1); f->oneiter = oneiter; - SEXP xattr = getAttrib(VECTOR_ELT(flist,i),install("x")); - if(isNull(xattr)) { + SEXP xattr = getAttrib(VECTOR_ELT(flist, i), install("x")); + if (isNull(xattr)) + { f->x = NULL; - } else { - if(isMatrix(xattr)) { - if(nrows(xattr) != len) { - error("Factor interaction terms (%d) must have the same length (%d) as the factor", - LENGTH(xattr),len); - } - truefac--; - for(int j = 0; j < ncols(xattr); j++) { - FACTOR *g = factors[truefac++] = (FACTOR*) R_alloc(1,sizeof(FACTOR)); - g->group = f->group; - g->nlevels = f->nlevels; - g->oneiter = f->oneiter; - g->x = &REAL(xattr)[j*(mybigint_t)nrows(xattr)]; - } - } else { - if(LENGTH(xattr) != len) { - error("Factor interaction terms (%d) must have the same length (%d) as the factor", - LENGTH(xattr),len); - } - f->x = REAL(xattr); + } + else + { + if (isMatrix(xattr)) + { + if (nrows(xattr) != len) + { + error("Factor interaction terms (%d) must have the same length (%d) as the factor", + LENGTH(xattr), len); + } + truefac--; + for (int j = 0; j < ncols(xattr); j++) + { + FACTOR *g = factors[truefac++] = (FACTOR *)R_alloc(1, sizeof(FACTOR)); + g->group = f->group; + g->nlevels = f->nlevels; + g->oneiter = f->oneiter; + g->x = &REAL(xattr)[j * (mybigint_t)nrows(xattr)]; + } + } + else + { + if (LENGTH(xattr) != len) + { + error("Factor interaction terms (%d) must have the same length (%d) as the factor", + LENGTH(xattr), len); + } + f->x = REAL(xattr); } } } - /* Make array for holding precomputed group levels + /* Make array for holding precomputed group levels Now, what about entries which don't belong to a group I.e. NA entries, how is that handled, I wonder. seems to be negative. Anyway, we fail on them. No, we don't */ - - for(int i = 0; i < truefac; i++) { + + for (int i = 0; i < truefac; i++) + { FACTOR *f = factors[i]; - f->gpsize = (double *)R_alloc(f->nlevels,sizeof(double)); - f->invgpsize = (double *)R_alloc(f->nlevels,sizeof(double)); - memset(f->gpsize,0,f->nlevels*sizeof(double)); + f->gpsize = (double *)R_alloc(f->nlevels, sizeof(double)); + f->invgpsize = (double *)R_alloc(f->nlevels, sizeof(double)); + memset(f->gpsize, 0, f->nlevels * sizeof(double)); /* first count it */ - for(int j = 0; j < N; j++) { - /* skip entries without a group, do we need that? */ - /* if(f->group[j] < 1) error("Factors can't have missing levels"); */ - if(f->group[j] > 0) { - double w = (f->x == NULL) ? (weights==NULL ? 1.0 : weights[j]) : - (weights==NULL ? f->x[j] : f->x[j]*weights[j]); - f->gpsize[f->group[j]-1] += w*w; - } else { - if(!allowmissing) error("Factors can't have missing levels"); + for (int j = 0; j < N; j++) + { + /* skip entries without a group, do we need that? */ + /* if(f->group[j] < 1) error("Factors can't have missing levels"); */ + if (f->group[j] > 0) + { + double w = (f->x == NULL) ? (weights == NULL ? 1.0 : weights[j]) : (weights == NULL ? f->x[j] : f->x[j] * weights[j]); + f->gpsize[f->group[j] - 1] += w * w; + } + else + { + if (!allowmissing) + error("Factors can't have missing levels"); } } /* then invert it, it's much faster to multiply than to divide */ /* in the iterations */ - for(int j = 0; j < f->nlevels; j++) { - f->invgpsize[j] = 1.0/f->gpsize[j]; + for (int j = 0; j < f->nlevels; j++) + { + f->invgpsize[j] = 1.0 / f->gpsize[j]; } } - return(factors); + return (factors); } - /* This one is a bit tricky. We could do it quite elegantly recursively, but we would suffer from a deep call-stack. Hence, we make our own stack and push/pop in a loop */ -static int Components(int **vertices, FACTOR **factors, int K) { +static int Components(int **vertices, FACTOR **factors, int K) +{ int stacklevel = 0; int *stack; - int curfac, curvert, curcomp,candvert; - int startfac=0,startvert=0; - int ii=0,i; - int numvert=0; + int curfac, curvert, curcomp, candvert; + int startfac = 0, startvert = 0; + int ii = 0, i; + int numvert = 0; /* How big a stack ? */ /* The number of vertices */ - for(i = 0; i < K; i++) numvert += factors[i]->nlevels; + for (i = 0; i < K; i++) + numvert += factors[i]->nlevels; /* Never used in threads, so use R's Calloc */ - stack = Calloc(numvert*4,int); + stack = Calloc(numvert * 4, int); #define PUSH(x) stack[stacklevel++] = x #define POP(x) x = stack[--stacklevel] -#define PUSHALL {PUSH(startvert); PUSH(startfac); PUSH(curfac); PUSH(ii);} -#define POPALL {POP(ii); POP(curfac); POP(startfac); POP(startvert);} +#define PUSHALL \ + { \ + PUSH(startvert); \ + PUSH(startfac); \ + PUSH(curfac); \ + PUSH(ii); \ + } +#define POPALL \ + { \ + POP(ii); \ + POP(curfac); \ + POP(startfac); \ + POP(startvert); \ + } curcomp = 1; candvert = 0; - do { + do + { curvert = candvert; curfac = 0; /* Find the entire component */ - while(1) { - /* At the top here, curfac,curvert is a candidate for marking off as - a vertex in curcomp - For each iteration: - - If it's not marked, mark it, push on stack, go to first datapoint - If it's already marked, go to next datapoint for the vertex (incidence matrix) - If data-points are exhausted, go to next factor, start over with datapoints. - If factors are exhausted, pop the stack - If final stack-frame, we're done with component. + while (1) + { + /* At the top here, curfac,curvert is a candidate for marking off as + a vertex in curcomp + For each iteration: + + If it's not marked, mark it, push on stack, go to first datapoint + If it's already marked, go to next datapoint for the vertex (incidence matrix) + If data-points are exhausted, go to next factor, start over with datapoints. + If factors are exhausted, pop the stack + If final stack-frame, we're done with component. */ - - if(vertices[curfac][curvert] == 0) { - /* Mark new vertex, find incidence list */ - vertices[curfac][curvert] = curcomp; - PUSHALL; - startvert = curvert; - startfac = curfac; - curfac = (startfac+1)%K; - ii = factors[startfac]->ii[startvert]; - } else { - /* Been there, try next in group */ - ii++; + + if (vertices[curfac][curvert] == 0) + { + /* Mark new vertex, find incidence list */ + vertices[curfac][curvert] = curcomp; + PUSHALL; + startvert = curvert; + startfac = curfac; + curfac = (startfac + 1) % K; + ii = factors[startfac]->ii[startvert]; + } + else + { + /* Been there, try next in group */ + ii++; } - if(ii >= factors[startfac]->ii[startvert+1]) { - /* No more, move to next factor */ - curfac = (curfac + 1) % K; - if(curfac == startfac) { - /* This is where we began, pop */ - /* No more neighbours, go back to previous */ - POPALL; - /* Get outta here */ - if(0 == stacklevel) break; - } else { - /* start over with data-points */ - ii = factors[startfac]->ii[startvert]; - } + if (ii >= factors[startfac]->ii[startvert + 1]) + { + /* No more, move to next factor */ + curfac = (curfac + 1) % K; + if (curfac == startfac) + { + /* This is where we began, pop */ + /* No more neighbours, go back to previous */ + POPALL; + /* Get outta here */ + if (0 == stacklevel) + break; + } + else + { + /* start over with data-points */ + ii = factors[startfac]->ii[startvert]; + } } - curvert = factors[curfac]->group[factors[startfac]->gpl[ii]]-1; + curvert = factors[curfac]->group[factors[startfac]->gpl[ii]] - 1; } /* Find next component */ - while(candvert < factors[0]->nlevels && vertices[0][candvert] != 0) candvert++; + while (candvert < factors[0]->nlevels && vertices[0][candvert] != 0) + candvert++; curcomp++; - } while(candvert < factors[0]->nlevels); + } while (candvert < factors[0]->nlevels); Free(stack); - return(curcomp-1); + return (curcomp - 1); #undef PUSH #undef POP #undef PUSHALL #undef POPALL } - + // Algorithm from: // "A Note on the Determination of Connectedness in an N-Way Cross Classification" // D.L. Weeks and D.R. Williams, Technometrics, vol 6 no 3, August 1964 // There probably exists faster algorithms, this one is quite slow -static void wwcomp(FACTOR *factors[], int numfac, int N, int *newlevels) { - int level = 0; - int chead = 0; - int *newlist = Calloc(N,int); - int *oldlist = Calloc(N,int); - int oldstart = 0; - // For cache-efficiency, make a numfac x N matrix of the factors - int *facmat = Calloc(numfac*N, int); - - for(mysize_t i = 0; i < N; i++) { - int *obsp = &facmat[i*numfac]; - newlevels[i] = 0; - oldlist[i] = i; - for(int j = 0; j < numfac; j++) { - obsp[j] = factors[j]->group[i]; - } - } - while(oldstart < N) { - int newidx; - // set component number for first node in component - // find the next we haven't checked, it's oldlist[oldstart] - // increase oldstart by one - level++; - chead = oldlist[oldstart++]; - // put it as the first element in our newlist - newlist[0] = chead; - newlevels[chead] = level; - newidx = 1; - // loop over the list of newly added nodes, including the head - // Note that we may increase newidx during the loop - for(int i = 0; i < newidx; i++) { - mysize_t newnode = newlist[i]; - int *newp = &facmat[newnode*numfac]; - // search for observations with distance 1 from newnode, mark them with level - for(int jidx = oldstart; jidx < N; jidx++) { - mysize_t trynode = oldlist[jidx]; - int *tryp = &facmat[trynode*numfac]; - int dist = 0; - // compute distance - for(int fi = 0; fi < numfac && dist < 2; fi++) - dist += (newp[fi] != tryp[fi]); - //dist += (factors[fi]->group[newnode] != factors[fi]->group[trynode]); - // if close, set its level, add it to the list, move the start node - // to the empty place in oldlist. - if(dist < 2) { - newlevels[trynode] = level; - newlist[newidx++] = trynode; - oldlist[jidx] = oldlist[oldstart++]; - } - } - } - } - Free(facmat); - Free(newlist); - Free(oldlist); - } +static void wwcomp(FACTOR *factors[], int numfac, int N, int *newlevels) +{ + int level = 0; + int chead = 0; + int *newlist = Calloc(N, int); + int *oldlist = Calloc(N, int); + int oldstart = 0; + // For cache-efficiency, make a numfac x N matrix of the factors + int *facmat = Calloc(numfac * N, int); + + for (mysize_t i = 0; i < N; i++) + { + int *obsp = &facmat[i * numfac]; + newlevels[i] = 0; + oldlist[i] = i; + for (int j = 0; j < numfac; j++) + { + obsp[j] = factors[j]->group[i]; + } + } + while (oldstart < N) + { + int newidx; + // set component number for first node in component + // find the next we haven't checked, it's oldlist[oldstart] + // increase oldstart by one + level++; + chead = oldlist[oldstart++]; + // put it as the first element in our newlist + newlist[0] = chead; + newlevels[chead] = level; + newidx = 1; + // loop over the list of newly added nodes, including the head + // Note that we may increase newidx during the loop + for (int i = 0; i < newidx; i++) + { + mysize_t newnode = newlist[i]; + int *newp = &facmat[newnode * numfac]; + // search for observations with distance 1 from newnode, mark them with level + for (int jidx = oldstart; jidx < N; jidx++) + { + mysize_t trynode = oldlist[jidx]; + int *tryp = &facmat[trynode * numfac]; + int dist = 0; + // compute distance + for (int fi = 0; fi < numfac && dist < 2; fi++) + dist += (newp[fi] != tryp[fi]); + // dist += (factors[fi]->group[newnode] != factors[fi]->group[trynode]); + // if close, set its level, add it to the list, move the start node + // to the empty place in oldlist. + if (dist < 2) + { + newlevels[trynode] = level; + newlist[newidx++] = trynode; + oldlist[jidx] = oldlist[oldstart++]; + } + } + } + } + Free(facmat); + Free(newlist); + Free(oldlist); +} /* R entry-point for conncomp. Takes a list of factors as input. */ -SEXP MY_wwcomp(SEXP flist) { - int numfac, N; - FACTOR **factors; - SEXP result; +SEXP MY_wwcomp(SEXP flist) +{ + int numfac, N; + FACTOR **factors; + SEXP result; - numfac = LENGTH(flist); - if(numfac < 2) error("At least two factors must be specified"); + if (numfac < 2) + error("At least two factors must be specified"); - N = LENGTH(VECTOR_ELT(flist,0)); - for(int i = 0; i < numfac; i++) { - if(N != LENGTH(VECTOR_ELT(flist,i))) + N = LENGTH(VECTOR_ELT(flist, 0)); + for (int i = 0; i < numfac; i++) + { + if (N != LENGTH(VECTOR_ELT(flist, i))) error("Factors must have the same length"); } - factors = (FACTOR**) R_alloc(numfac,sizeof(FACTOR*)); - for(int i = 0; i < numfac; i++) { + factors = (FACTOR **)R_alloc(numfac, sizeof(FACTOR *)); + for (int i = 0; i < numfac; i++) + { FACTOR *f; - factors[i] = (FACTOR*) R_alloc(1,sizeof(FACTOR)); + factors[i] = (FACTOR *)R_alloc(1, sizeof(FACTOR)); f = factors[i]; - f->group = INTEGER(VECTOR_ELT(flist,i)); + f->group = INTEGER(VECTOR_ELT(flist, i)); } - PROTECT(result = allocVector(INTSXP,N)); + PROTECT(result = allocVector(INTSXP, N)); int *fac = INTEGER(result); wwcomp(factors, numfac, N, fac); // Now it's time to order the levels by decreasing size, so let's compute the sizes int levels = 0; - for(int i = 0; i < N; i++) if(fac[i] > levels) levels = fac[i]; - double *levsize = (double*) R_alloc(levels, sizeof(double)); - int *index = (int*) R_alloc(levels, sizeof(int)); - for(int i = 0; i < levels; i++) { + for (int i = 0; i < N; i++) + if (fac[i] > levels) + levels = fac[i]; + double *levsize = (double *)R_alloc(levels, sizeof(double)); + int *index = (int *)R_alloc(levels, sizeof(int)); + for (int i = 0; i < levels; i++) + { levsize[i] = 0.0; index[i] = i; } - for(int i = 0; i < N; i++) levsize[fac[i]-1] = levsize[fac[i]-1]+1; - revsort(levsize,index,levels); - int *rindex = (int*) R_alloc(levels, sizeof(int)); - for(int i = 0; i < levels; i++) rindex[index[i]] = i; - for(int i = 0; i < N; i++) { - fac[i] = rindex[fac[i]-1]+1; + for (int i = 0; i < N; i++) + levsize[fac[i] - 1] = levsize[fac[i] - 1] + 1; + revsort(levsize, index, levels); + int *rindex = (int *)R_alloc(levels, sizeof(int)); + for (int i = 0; i < levels; i++) + rindex[index[i]] = i; + for (int i = 0; i < N; i++) + { + fac[i] = rindex[fac[i] - 1] + 1; } - UNPROTECT(1); return result; } -/* -Then for finding connection components +/* +Then for finding connection components From R we take a list of factors, we return a factor of the same length with the connection components */ -SEXP MY_conncomp(SEXP flist) { +SEXP MY_conncomp(SEXP flist) +{ int numfac; int i; - + int N; FACTOR **factors; int *group; @@ -361,69 +431,79 @@ SEXP MY_conncomp(SEXP flist) { int *idx; numfac = LENGTH(flist); - if(numfac < 2) error("At least two factors must be specified"); - N = LENGTH(VECTOR_ELT(flist,0)); - for(i = 0; i < numfac; i++) { - if(N != LENGTH(VECTOR_ELT(flist,i))) + if (numfac < 2) + error("At least two factors must be specified"); + N = LENGTH(VECTOR_ELT(flist, 0)); + for (i = 0; i < numfac; i++) + { + if (N != LENGTH(VECTOR_ELT(flist, i))) error("Factors must have the same length"); } - factors = (FACTOR**) R_alloc(numfac,sizeof(FACTOR*)); + factors = (FACTOR **)R_alloc(numfac, sizeof(FACTOR *)); PROTECT(flist = AS_LIST(flist)); - for(i = 0; i < numfac; i++) { + for (i = 0; i < numfac; i++) + { FACTOR *f; - factors[i] = (FACTOR*) R_alloc(1,sizeof(FACTOR)); + factors[i] = (FACTOR *)R_alloc(1, sizeof(FACTOR)); f = factors[i]; - f->group = INTEGER(VECTOR_ELT(flist,i)); - f->nlevels = LENGTH(getAttrib(VECTOR_ELT(flist,i),R_LevelsSymbol)); - if(f->nlevels == 0) error("Factor %s has zero levels", CHAR(STRING_ELT(GET_NAMES(flist),i))); - invertfactor(f,N); + f->group = INTEGER(VECTOR_ELT(flist, i)); + f->nlevels = LENGTH(getAttrib(VECTOR_ELT(flist, i), R_LevelsSymbol)); + if (f->nlevels == 0) + error("Factor %s has zero levels", CHAR(STRING_ELT(GET_NAMES(flist), i))); + invertfactor(f, N); } /* Create vertices */ - vertices = (int**) R_alloc(numfac,sizeof(int*)); + vertices = (int **)R_alloc(numfac, sizeof(int *)); /* Create arrays for them */ - for(i = 0; i < numfac; i++) { - vertices[i] = (int*) R_alloc(factors[i]->nlevels,sizeof(int)); + for (i = 0; i < numfac; i++) + { + vertices[i] = (int *)R_alloc(factors[i]->nlevels, sizeof(int)); /* Assign no component to them*/ - memset(vertices[i],0,sizeof(int)*factors[i]->nlevels); + memset(vertices[i], 0, sizeof(int) * factors[i]->nlevels); } /* Do the stuff */ - comps = Components(vertices,factors,numfac); + comps = Components(vertices, factors, numfac); /* allocate result structure */ - PROTECT(result = allocVector(INTSXP,N)); + PROTECT(result = allocVector(INTSXP, N)); resgroup = INTEGER(result); group = factors[0]->group; - for(i = 0; i < N; i++) { - resgroup[i] = vertices[0][group[i]-1]; + for (i = 0; i < N; i++) + { + resgroup[i] = vertices[0][group[i] - 1]; } - /* the levels should be ordered by decreasing size. How do we do this? + /* the levels should be ordered by decreasing size. How do we do this? Hmm, we should have a look at revsort supplied by R. - There must be an easier way, I'm clumsy today. + There must be an easier way, I'm clumsy today. */ - gpsiz = Calloc(comps,double); - idx = Calloc(comps,int); - - for(i = 0; i < comps; i++) idx[i] = i; - for(i = 0; i < N; i++) { - gpsiz[resgroup[i]-1]++; + gpsiz = Calloc(comps, double); + idx = Calloc(comps, int); + + for (i = 0; i < comps; i++) + idx[i] = i; + for (i = 0; i < N; i++) + { + gpsiz[resgroup[i] - 1]++; } - revsort(gpsiz,idx,comps); + revsort(gpsiz, idx, comps); Free(gpsiz); - levtrl = Calloc(comps,int); - for(i = 0; i < comps; i++) levtrl[idx[i]] = i+1; + levtrl = Calloc(comps, int); + for (i = 0; i < comps; i++) + levtrl[idx[i]] = i + 1; Free(idx); - for(i = 0; i < N; i++) { - resgroup[i] = levtrl[resgroup[i]-1]; + for (i = 0; i < N; i++) + { + resgroup[i] = levtrl[resgroup[i] - 1]; } Free(levtrl); UNPROTECT(2); - return(result); + return (result); } diff --git a/src/kaczmarz.c b/src/kaczmarz.c index 222218a..1268ce9 100644 --- a/src/kaczmarz.c +++ b/src/kaczmarz.c @@ -1,13 +1,11 @@ -/* - $Id: kaczmarz.c 1915 2016-04-05 12:56:27Z sgaure $ -*/ #include "lfe.h" -/* Need sprintf */ -#include +/* Need snprintf */ +#include -typedef struct { +typedef struct +{ int nowdoing; - + double **source; double **target; FACTOR **factors; @@ -33,15 +31,16 @@ typedef struct { #endif } KARG; -static double kaczmarz(FACTOR *factors[],int e, mysize_t N, double *R, double *x, - double eps, mysize_t *work, int *stop, LOCK_T lock) { +static double kaczmarz(FACTOR *factors[], int e, mysize_t N, double *R, double *x, + double eps, mysize_t *work, int *stop, LOCK_T lock) +{ /* The factors define a matrix D, we will solve Dx = R There are N rows in D, each row a_i contains e non-zero entries, one for each factor, the level at that position. We iterate on k, start with x=0, an iteration consists of adding a multiple of row i (with i = k %% N), the multiplying coefficient - is (R[i] - (a_i,x))/e + is (R[i] - (a_i,x))/e To get better memory locality, we create an (e X N)-matrix with the non-zero indices @@ -54,9 +53,9 @@ static double kaczmarz(FACTOR *factors[],int e, mysize_t N, double *R, double *x */ double norm2; - double prevdiff,neweps; - double c,sd; - int iter=0; + double prevdiff, neweps; + double c, sd; + int iter = 0; int ie; int newN; @@ -69,44 +68,53 @@ static double kaczmarz(FACTOR *factors[],int e, mysize_t N, double *R, double *x */ int hasinteract = 0; - for(int i = 0; i < e; i++) if(NULL != factors[i]->x) hasinteract = 1; + for (int i = 0; i < e; i++) + if (NULL != factors[i]->x) + hasinteract = 1; mysize_t workpos = 0; /* Do the doubles first to keep alignment */ - double *newR = (double *) work; - mysize_t *indices = (mysize_t *) &work[workpos += N*sizeof(double)/sizeof(mysize_t)]; - mysize_t *perm = (mysize_t *) &work[workpos += e*N]; - mysize_t *prev = (mysize_t *) &work[workpos += N]; - mysize_t *this = (mysize_t *) &work[workpos += e]; - - if(!hasinteract) for(int i = 0; i < e; i++) prev[i] = 0; + double *newR = (double *)work; + mysize_t *indices = (mysize_t *)&work[workpos += N * sizeof(double) / sizeof(mysize_t)]; + mysize_t *perm = (mysize_t *)&work[workpos += e * N]; + mysize_t *prev = (mysize_t *)&work[workpos += N]; + mysize_t *this = (mysize_t *)&work[workpos += e]; + + if (!hasinteract) + for (int i = 0; i < e; i++) + prev[i] = 0; newN = 0; ie = 0; - for(mysize_t i = 0; i < N; i++) { + for (mysize_t i = 0; i < N; i++) + { perm[i] = i; - for(int j = 0; j < e; j++) { + for (int j = 0; j < e; j++) + { this[j] = factors[j]->group[i]; } - if(hasinteract || (memcmp(this,prev,e*sizeof(int)) != 0) ) { - int nlev=0; + if (hasinteract || (memcmp(this, prev, e * sizeof(int)) != 0)) + { + int nlev = 0; /* not duplicate, store in indices */ - - for(int j = 0; j < e; j++) { - indices[ie+j] = this[j]-1 + nlev; - nlev += factors[j]->nlevels; + + for (int j = 0; j < e; j++) + { + indices[ie + j] = this[j] - 1 + nlev; + nlev += factors[j]->nlevels; } newR[newN] = R[i]; newN++; ie += e; - if(!hasinteract) memcpy(prev,this,e*sizeof(int)); + if (!hasinteract) + memcpy(prev, this, e * sizeof(int)); } } - /* + /* At this point we should perhaps randomly shuffle the equations. We don't know when this ought to be done, but we've only seen it when there are many factors. - We want to use unif_rand to be able to get reproducible results, + We want to use unif_rand to be able to get reproducible results, at least for single-threaded things, and keeping the random number generator in the same state when we return to R. The unif_rand isn't concurrency-safe, so protect with mutex. @@ -116,14 +124,17 @@ static double kaczmarz(FACTOR *factors[],int e, mysize_t N, double *R, double *x of the routine, only the speed. */ LOCK(lock); - if(e > 1) { - for(mysize_t i = newN-1; i > 0; i--) { + if (e > 1) + { + for (mysize_t i = newN - 1; i > 0; i--) + { mysize_t j; /* Pick j between 0 and i inclusive */ - j = (mysize_t) floor((i+1) * unif_rand()); - if(j == i) continue; + j = (mysize_t)floor((i + 1) * unif_rand()); + if (j == i) + continue; /* exchange newR[i] and newR[j] - as well as indices[i*e:i*e+e-1] and indices[j*e:j*e+e-1] + as well as indices[i*e:i*e+e-1] and indices[j*e:j*e+e-1] */ double dtmp = newR[j]; newR[j] = newR[i]; @@ -131,111 +142,121 @@ static double kaczmarz(FACTOR *factors[],int e, mysize_t N, double *R, double *x mysize_t itmp = perm[j]; perm[j] = perm[i]; perm[i] = itmp; - for(mysize_t k = 0; k < e; k++) { - mysize_t itmp; - itmp = indices[j*e+k]; - indices[j*e+k] = indices[i*e+k]; - indices[i*e+k] = itmp; + for (mysize_t k = 0; k < e; k++) + { + mysize_t itmp; + itmp = indices[j * e + k]; + indices[j * e + k] = indices[i * e + k]; + indices[i * e + k] = itmp; } } } UNLOCK(lock); /* Then, do the Kaczmarz iterations */ - norm2 =0.0; - for(mysize_t i = 0; i < newN; i++) norm2 += newR[i]*newR[i]; + norm2 = 0.0; + for (mysize_t i = 0; i < newN; i++) + norm2 += newR[i] * newR[i]; norm2 = sqrt(norm2); - prevdiff = 2*norm2; - neweps = eps*norm2; - + prevdiff = 2 * norm2; + neweps = eps * norm2; - do { + do + { mysize_t ie = 0; /* equals i*e; integer multiplication is slow, keep track instead */ double diff = 0.0; - for(mysize_t i = 0; i < newN; i++,ie+=e) { + for (mysize_t i = 0; i < newN; i++, ie += e) + { const mysize_t ip = perm[i]; double upd = 0.0; double ai2 = 0.0; upd = newR[i]; - + /* Subtract inner product */ - for(int j = 0; j < e; j++) { - const mysize_t idx = indices[ie + j]; - const double *fx = factors[j]->x; - const double w = (fx == NULL) ? 1.0 : fx[ip]; - upd -= x[idx]*w; - ai2 += w*w; + for (int j = 0; j < e; j++) + { + const mysize_t idx = indices[ie + j]; + const double *fx = factors[j]->x; + const double w = (fx == NULL) ? 1.0 : fx[ip]; + upd -= x[idx] * w; + ai2 += w * w; } /* Update */ upd /= ai2; - for(int j = 0; j < e; j++) { - const mysize_t idx = indices[ie + j]; - const double *fx = factors[j]->x; - const double w = (fx == NULL) ? upd : upd*fx[ip]; - x[idx] += w; - diff += w*w; + for (int j = 0; j < e; j++) + { + const mysize_t idx = indices[ie + j]; + const double *fx = factors[j]->x; + const double w = (fx == NULL) ? upd : upd * fx[ip]; + x[idx] += w; + diff += w * w; } } iter++; sd = sqrt(diff); - c = sd/prevdiff; + c = sd / prevdiff; prevdiff = sd; - if(c >= 1.0 && iter > 20) { + if (c >= 1.0 && iter > 20) + { char buf[256]; - sprintf(buf,"Kaczmarz failed in iter %d*%d, c=1-%.0e, delta=%.1e, eps=%.1e\n",iter,newN,1.0-c,sd,neweps); - pushmsg(buf,lock); + snprintf(buf, sizeof(buf), "Kaczmarz failed in iter %d*%d, c=1-%.0e, delta=%.1e, eps=%.1e\n", iter, newN, 1.0 - c, sd, neweps); + pushmsg(buf, lock); break; } #ifdef NOTHREADS R_CheckUserInterrupt(); #else - if(*stop != 0) return(0); + if (*stop != 0) + return (0); #endif - } while(sd >= neweps*(1.0-c) && neweps > 1e-15); + } while (sd >= neweps * (1.0 - c) && neweps > 1e-15); - return(sd); + return (sd); } #ifdef WIN -DWORD WINAPI kaczmarz_thr(LPVOID varg) { +DWORD WINAPI kaczmarz_thr(LPVOID varg) +{ #else -static void *kaczmarz_thr(void *varg) { +static void *kaczmarz_thr(void *varg) +{ #endif - KARG *arg = (KARG*) varg; + KARG *arg = (KARG *)varg; int myid; int vecnum; /* Get a thread id */ LOCK(arg->lock); myid = arg->threadnum++; UNLOCK(arg->lock); - while(1) { + while (1) + { LOCK(arg->lock); vecnum = arg->nowdoing++; UNLOCK(arg->lock); - if(vecnum >= arg->numvec) break; + if (vecnum >= arg->numvec) + break; #ifdef HAVE_THREADNAME char thrname[16]; - // cran whines about truncation here, that's the &%#@/%! purpose - //snprintf(thrname, 16, "Kz %d/%d",vecnum+1, arg->numvec); char buf[256]; - snprintf(buf,256,"Kz %d/%d",vecnum+1, arg->numvec); + snprintf(buf, 256, "Kz %d/%d", vecnum + 1, arg->numvec); buf[15] = 0; - memcpy(thrname,buf,16); + memcpy(thrname, buf, 16); STNAME(thrname); #endif - (void) kaczmarz(arg->factors,arg->e,arg->N, - arg->source[vecnum],arg->target[vecnum], - arg->eps, arg->work[myid], &arg->stop, arg->lock); + (void)kaczmarz(arg->factors, arg->e, arg->N, + arg->source[vecnum], arg->target[vecnum], + arg->eps, arg->work[myid], &arg->stop, arg->lock); } #ifndef NOTHREADS #ifndef WIN LOCK(arg->lock); arg->running--; #ifdef HAVE_SEM - if(arg->running == 0) sem_post(&arg->finished); + if (arg->running == 0) + sem_post(&arg->finished); #endif UNLOCK(arg->lock); #endif @@ -243,7 +264,8 @@ static void *kaczmarz_thr(void *varg) { return 0; } -SEXP MY_kaczmarz(SEXP flist, SEXP vlist, SEXP Reps, SEXP initial, SEXP Rcores) { +SEXP MY_kaczmarz(SEXP flist, SEXP vlist, SEXP Reps, SEXP initial, SEXP Rcores) +{ double eps = REAL(Reps)[0]; /* double *R = REAL(RR);*/ @@ -261,7 +283,7 @@ SEXP MY_kaczmarz(SEXP flist, SEXP vlist, SEXP Reps, SEXP initial, SEXP Rcores) { double **vectors, **target; int cnt; int numthr = 1; - int protectcount=0; + int protectcount = 0; #ifndef NOTHREADS int thr; #ifdef WIN @@ -278,108 +300,127 @@ SEXP MY_kaczmarz(SEXP flist, SEXP vlist, SEXP Reps, SEXP initial, SEXP Rcores) { /* Set up the message stack */ initmsg(); #endif - if(!isNull(initial)) { + if (!isNull(initial)) + { init = REAL(initial); } - PROTECT(flist = AS_LIST(flist));protectcount++; + PROTECT(flist = AS_LIST(flist)); + protectcount++; // numfac = LENGTH(flist); factors = makefactors(flist, 0, NULL); numfac = 0; - for(FACTOR **f = factors; *f != NULL; f++) numfac++; - N = LENGTH(VECTOR_ELT(flist,0)); - for(int i = 0; i < numfac; i++) + for (FACTOR **f = factors; *f != NULL; f++) + numfac++; + N = LENGTH(VECTOR_ELT(flist, 0)); + for (int i = 0; i < numfac; i++) sumlev += factors[i]->nlevels; - - if(!isNull(initial) && LENGTH(initial) != sumlev) - error("Initial vector must have length %d, but is %d\n",sumlev, LENGTH(initial)); + if (!isNull(initial) && LENGTH(initial) != sumlev) + error("Initial vector must have length %d, but is %d\n", sumlev, LENGTH(initial)); /* Then the vectors */ - PROTECT(vlist = AS_LIST(vlist)); protectcount++; + PROTECT(vlist = AS_LIST(vlist)); + protectcount++; listlen = LENGTH(vlist); - PROTECT(reslist = NEW_LIST(listlen)); protectcount++; + PROTECT(reslist = NEW_LIST(listlen)); + protectcount++; /* First, count the number of vectors in total */ numvec = 0; - for(int i = 0; i < listlen; i++) { - SEXP elt = VECTOR_ELT(vlist,i); + for (int i = 0; i < listlen; i++) + { + SEXP elt = VECTOR_ELT(vlist, i); /* Each entry in the list is either a vector or a matrix */ - if(!isMatrix(elt)) { - if(LENGTH(elt) != N) - error("Vector length (%d) must be equal to factor length (%d)",LENGTH(elt),N); + if (!isMatrix(elt)) + { + if (LENGTH(elt) != N) + error("Vector length (%d) must be equal to factor length (%d)", LENGTH(elt), N); numvec++; - } else { - if(nrows(elt) != N) - error("Vector length must be equal to factor length %d %d ",nrows(elt),N); + } + else + { + if (nrows(elt) != N) + error("Vector length must be equal to factor length %d %d ", nrows(elt), N); numvec += ncols(elt); } } /* Allocate pointers to source vectors */ - vectors = (double **)R_alloc(numvec,sizeof(double*)); + vectors = (double **)R_alloc(numvec, sizeof(double *)); /* Allocate pointers to result vectors */ - target = (double**) R_alloc(numvec,sizeof(double*)); + target = (double **)R_alloc(numvec, sizeof(double *)); /* Loop through list again to set up result structure */ cnt = 0; - for(int i = 0; i < listlen; i++) { - SEXP elt = VECTOR_ELT(vlist,i); - if(!isReal(elt)) { - elt = PROTECT(coerceVector(elt, REALSXP)); protectcount++; + for (int i = 0; i < listlen; i++) + { + SEXP elt = VECTOR_ELT(vlist, i); + if (!isReal(elt)) + { + elt = PROTECT(coerceVector(elt, REALSXP)); + protectcount++; } - if(!isMatrix(elt)) { + if (!isMatrix(elt)) + { /* It's a vector */ SEXP resvec; vectors[cnt] = REAL(elt); - PROTECT(resvec = allocVector(REALSXP,sumlev)); + PROTECT(resvec = allocVector(REALSXP, sumlev)); target[cnt] = REAL(resvec); - SET_VECTOR_ELT(reslist,i,resvec); + SET_VECTOR_ELT(reslist, i, resvec); UNPROTECT(1); cnt++; - } else { + } + else + { /* It's a matrix */ int cols = ncols(elt); SEXP mtx; /* Allocate a matrix */ - PROTECT(mtx = allocMatrix(REALSXP,sumlev,cols)); - SET_VECTOR_ELT(reslist,i,mtx); + PROTECT(mtx = allocMatrix(REALSXP, sumlev, cols)); + SET_VECTOR_ELT(reslist, i, mtx); UNPROTECT(1); /* Set up pointers */ - for(int j = 0; j < cols; j++) { - vectors[cnt] = REAL(elt) + j*N; - target[cnt] = REAL(mtx) + j*sumlev; - cnt++; + for (int j = 0; j < cols; j++) + { + vectors[cnt] = REAL(elt) + j * N; + target[cnt] = REAL(mtx) + j * sumlev; + cnt++; } } } - - for(int cnt = 0; cnt < numvec; cnt++) { - if(init != 0) - for(int i = 0; i < sumlev; i++) target[cnt][i] = init[i]; + for (int cnt = 0; cnt < numvec; cnt++) + { + if (init != 0) + for (int i = 0; i < sumlev; i++) + target[cnt][i] = init[i]; else - for(int i = 0; i < sumlev; i++) target[cnt][i] = 0.0; + for (int i = 0; i < sumlev; i++) + target[cnt][i] = 0.0; } - /* set up for threading */ numthr = cores; - if(numthr > numvec) numthr = numvec; - if(numthr < 1) numthr = 1; + if (numthr > numvec) + numthr = numvec; + if (numthr < 1) + numthr = 1; GetRNGstate(); #ifndef NOTHREADS #ifdef WIN - lock = CreateMutex(NULL,FALSE,NULL); + lock = CreateMutex(NULL, FALSE, NULL); arg.lock = lock; - threads = (HANDLE*) R_alloc(numthr,sizeof(HANDLE)); - threadids = (DWORD*) R_alloc(numthr,sizeof(DWORD)); + threads = (HANDLE *)R_alloc(numthr, sizeof(HANDLE)); + threadids = (DWORD *)R_alloc(numthr, sizeof(DWORD)); #else - threads = (pthread_t*) R_alloc(numthr,sizeof(pthread_t)); + threads = (pthread_t *)R_alloc(numthr, sizeof(pthread_t)); arg.running = numthr; #ifdef HAVE_SEM - if(sem_init(&arg.finished,0,0) != 0) error("sem_init failed, errno=%d",errno); + if (sem_init(&arg.finished, 0, 0) != 0) + error("sem_init failed, errno=%d", errno); #endif arg.lock = &lock; #endif @@ -395,47 +436,56 @@ SEXP MY_kaczmarz(SEXP flist, SEXP vlist, SEXP Reps, SEXP initial, SEXP Rcores) { arg.numvec = numvec; arg.N = N; arg.stop = 0; - arg.work = (mysize_t**) R_alloc(numthr, sizeof(mysize_t*)); + arg.work = (mysize_t **)R_alloc(numthr, sizeof(mysize_t *)); // when allocating the work, we use mysize_t, but parts of it is accessed as double which may be // larger. So we allocate some more (8*sizeof(mysize_t) more), and adjust the address so it's aligned on a double // When using it, make sure we do all the doubles first. #ifdef NOTHREADS - arg.work[0] = (mysize_t*) R_alloc(numfac*N + N*sizeof(double)/sizeof(mysize_t) + N + 2*numfac+8, sizeof(mysize_t)); - uintptr_t amiss = (uintptr_t) arg.work[0] % sizeof(double); - if(amiss != 0) arg.work[0] = (mysize_t*) ((uintptr_t)arg.work[0] + sizeof(double)-amiss); - kaczmarz_thr((void*)&arg); + arg.work[0] = (mysize_t *)R_alloc(numfac * N + N * sizeof(double) / sizeof(mysize_t) + N + 2 * numfac + 8, sizeof(mysize_t)); + uintptr_t amiss = (uintptr_t)arg.work[0] % sizeof(double); + if (amiss != 0) + arg.work[0] = (mysize_t *)((uintptr_t)arg.work[0] + sizeof(double) - amiss); + kaczmarz_thr((void *)&arg); #else /* Do it in separate threads */ - for(thr = 0; thr < numthr; thr++) { + for (thr = 0; thr < numthr; thr++) + { // allocate some thread-specific storage, we can't use R_alloc in a thread - arg.work[thr] = (mysize_t*) R_alloc(numfac*N + N*sizeof(double)/sizeof(mysize_t) + N + 2*numfac+8, sizeof(mysize_t)); - uintptr_t amiss = (uintptr_t) arg.work[thr] % sizeof(double); - if(amiss != 0) arg.work[thr] = (mysize_t*) ((uintptr_t)arg.work[thr] + sizeof(double)-amiss); + arg.work[thr] = (mysize_t *)R_alloc(numfac * N + N * sizeof(double) / sizeof(mysize_t) + N + 2 * numfac + 8, sizeof(mysize_t)); + uintptr_t amiss = (uintptr_t)arg.work[thr] % sizeof(double); + if (amiss != 0) + arg.work[thr] = (mysize_t *)((uintptr_t)arg.work[thr] + sizeof(double) - amiss); #ifdef WIN - threads[thr] = CreateThread(NULL,0,kaczmarz_thr,&arg,0,&threadids[thr]); - if(0 == threads[thr]) error("Failed to create kaczmarz thread"); + threads[thr] = CreateThread(NULL, 0, kaczmarz_thr, &arg, 0, &threadids[thr]); + if (0 == threads[thr]) + error("Failed to create kaczmarz thread"); #else - int stat = pthread_create(&threads[thr],NULL,kaczmarz_thr,&arg); - if(0 != stat) error("Failed to create kaczmarz thread, stat=%d",stat); + int stat = pthread_create(&threads[thr], NULL, kaczmarz_thr, &arg); + if (0 != stat) + error("Failed to create kaczmarz thread, stat=%d", stat); #endif } /* wait for completion */ /* We want to check for interrupts regularly, and set a stop flag */ - while(1) { + while (1) + { printmsg(arg.lock); - if(arg.stop == 0 && checkInterrupt()) { + if (arg.stop == 0 && checkInterrupt()) + { REprintf("...stopping Kaczmarz threads...\n"); - arg.stop=1; + arg.stop = 1; } #ifdef WIN - if(WaitForMultipleObjects(numthr,threads,TRUE,3000) != WAIT_TIMEOUT) { - for(thr = 0; thr < numthr; thr++) { - CloseHandle(threads[thr]); + if (WaitForMultipleObjects(numthr, threads, TRUE, 3000) != WAIT_TIMEOUT) + { + for (thr = 0; thr < numthr; thr++) + { + CloseHandle(threads[thr]); } /* Print any remaining messages */ printmsg(arg.lock); @@ -445,32 +495,37 @@ SEXP MY_kaczmarz(SEXP flist, SEXP vlist, SEXP Reps, SEXP initial, SEXP Rcores) { #else { #ifndef HAVE_SEM - struct timespec atmo = {0,50000000}; + struct timespec atmo = {0, 50000000}; /* Kludge in MacOSX because no timedwait */ - - if(arg.stop == 0) nanosleep(&atmo,NULL); - if(arg.stop == 1 || arg.running == 0) { + + if (arg.stop == 0) + nanosleep(&atmo, NULL); + if (arg.stop == 1 || arg.running == 0) + { #else - struct timespec tmo = {time(NULL)+3,0}; - if(arg.stop == 1 || sem_timedwait(&arg.finished,&tmo) == 0) { + struct timespec tmo = {time(NULL) + 3, 0}; + if (arg.stop == 1 || sem_timedwait(&arg.finished, &tmo) == 0) + { #endif - for(thr = 0; thr < numthr; thr++) { - (void)pthread_join(threads[thr], NULL); - } + for (thr = 0; thr < numthr; thr++) + { + (void)pthread_join(threads[thr], NULL); + } #ifdef HAVE_SEM - sem_destroy(&arg.finished); + sem_destroy(&arg.finished); #endif - /* Print any remaining messages */ - printmsg(arg.lock); - pthread_mutex_destroy(arg.lock); - break; + /* Print any remaining messages */ + printmsg(arg.lock); + pthread_mutex_destroy(arg.lock); + break; } } #endif } #endif PutRNGstate(); - if(arg.stop == 1) error("Kaczmarz interrupted"); + if (arg.stop == 1) + error("Kaczmarz interrupted"); UNPROTECT(protectcount); - return(reslist); + return (reslist); } diff --git a/src/lfe.c b/src/lfe.c index df8869c..3fdd8f0 100644 --- a/src/lfe.c +++ b/src/lfe.c @@ -1,45 +1,47 @@ -/* - $Id: lfe.c 2020 2016-04-27 05:13:51Z sgaure $ -*/ #include "lfe.h" SEXP df_string; int LFE_GLOBAL_THREADS = 1; -SEXP MY_threads(SEXP rt) { - if(LENGTH(rt) < 1) return R_NilValue; +SEXP MY_threads(SEXP rt) +{ + if (LENGTH(rt) < 1) + return R_NilValue; LFE_GLOBAL_THREADS = INTEGER(rt)[0]; return R_NilValue; } static R_CallMethodDef callMethods[] = { - {"conncomp", (DL_FUNC) &MY_conncomp, 1}, - {"wwcomp", (DL_FUNC) &MY_wwcomp, 1}, - {"demeanlist", (DL_FUNC) &MY_demeanlist, 11}, - {"kaczmarz", (DL_FUNC) &MY_kaczmarz, 5}, - {"setdimnames", (DL_FUNC) &MY_setdimnames, 2}, - {"scalecols", (DL_FUNC) &MY_scalecols, 2}, - {"pdaxpy", (DL_FUNC) &MY_pdaxpy, 3}, - {"sandwich", (DL_FUNC) &MY_sandwich, 3}, - {"piproduct", (DL_FUNC) &MY_piproduct, 2}, - {"dsyrk", (DL_FUNC) &MY_dsyrk, 4}, - {"address", (DL_FUNC) &MY_address, 1}, - // {"named", (DL_FUNC) &MY_named, 2}, - {"inplace", (DL_FUNC) &inplace, 1}, - {"rowsum", (DL_FUNC) &Crowsum, 3}, - // {"ppf", (DL_FUNC) &MY_ppf, 2}, - // {"threads", (DL_FUNC) &MY_threads, 1}, - {NULL, NULL, 0} -}; + {"conncomp", (DL_FUNC)&MY_conncomp, 1}, + {"wwcomp", (DL_FUNC)&MY_wwcomp, 1}, + {"demeanlist", (DL_FUNC)&MY_demeanlist, 11}, + {"kaczmarz", (DL_FUNC)&MY_kaczmarz, 5}, + {"setdimnames", (DL_FUNC)&MY_setdimnames, 2}, + {"scalecols", (DL_FUNC)&MY_scalecols, 2}, + {"pdaxpy", (DL_FUNC)&MY_pdaxpy, 3}, + {"sandwich", (DL_FUNC)&MY_sandwich, 3}, + {"piproduct", (DL_FUNC)&MY_piproduct, 2}, + {"dsyrk", (DL_FUNC)&MY_dsyrk, 4}, + {"address", (DL_FUNC)&MY_address, 1}, + // {"named", (DL_FUNC) &MY_named, 2}, + {"inplace", (DL_FUNC)&inplace, 1}, + {"rowsum", (DL_FUNC)&crowsum, 3}, + // {"ppf", (DL_FUNC) &MY_ppf, 2}, + // {"threads", (DL_FUNC) &MY_threads, 1}, + {NULL, NULL, 0}}; -void attribute_visible R_init_lfe(DllInfo *info) { +void attribute_visible R_init_lfe(DllInfo *info) +{ /* register our routines */ - (void) R_registerRoutines(info,NULL,callMethods,NULL,NULL); - (void) R_useDynamicSymbols(info, FALSE); - (void) R_forceSymbols(info, TRUE); - (void) R_PreserveObject(df_string=mkString("data.frame")); - LFE_GLOBAL_THREADS=1; + (void)R_registerRoutines(info, NULL, callMethods, NULL, NULL); + (void)R_useDynamicSymbols(info, FALSE); + (void)R_forceSymbols(info, TRUE); + (void)R_PreserveObject(df_string = mkString("data.frame")); + LFE_GLOBAL_THREADS = 1; } -void attribute_visible R_unload_lfe(DllInfo *info) { - if(info != NULL){}; //avoid pedantic warning about unused parameter - (void) R_ReleaseObject(df_string); +void attribute_visible R_unload_lfe(DllInfo *info) +{ + if (info != NULL) + { + }; // avoid pedantic warning about unused parameter + (void)R_ReleaseObject(df_string); } diff --git a/src/lfe.h b/src/lfe.h index e8e29cd..532bdaf 100644 --- a/src/lfe.h +++ b/src/lfe.h @@ -1,7 +1,3 @@ -/* - $Id: lfe.h 2020 2016-04-27 05:13:51Z sgaure $ -*/ - #include "config.h" /* different syntax in pthread_setname_np between platforms, disable for now */ @@ -17,7 +13,7 @@ #ifndef NOTHREADS #ifdef HAVE_THREADNAME -#define _GNU_SOURCE /* to find pthread_setname_np */ +#define _GNU_SOURCE /* to find pthread_setname_np */ #endif #include @@ -33,7 +29,7 @@ int pthread_setname_np(pthread_t thread, const char *name); #define STNAME(s) pthread_setname_np(pthread_self(), s) #elif __APPLE__ // Mac OS X: must be set from within the thread (can't specify thread ID) -int pthread_setname_np(const char*); +int pthread_setname_np(const char *); #define STNAME(s) pthread_setname_np(s) #elif __FreeBSD__ // FreeBSD & OpenBSD: function name is slightly different, and has no return value @@ -65,7 +61,6 @@ void pthread_set_name_np(pthread_t tid, const char *name); #include #include - #if defined(R_VERSION) && R_VERSION >= R_Version(3, 0, 0) typedef R_xlen_t mybigint_t; #else @@ -76,7 +71,7 @@ typedef int mybigint_t; This will increase the memory usage, so we wait until it's needed. */ -#ifdef HUGE_INT +#ifdef HUGE_INT typedef R_xlen_t mysize_t; #else typedef int mysize_t; @@ -84,42 +79,41 @@ typedef int mysize_t; /* Locking macros */ #ifdef NOTHREADS -#define LOCK_T int* +#define LOCK_T int * #define LOCK(l) #define UNLOCK(l) #else #ifdef WIN #define LOCK_T HANDLE -#define LOCK(l) WaitForSingleObject(l,INFINITE) +#define LOCK(l) WaitForSingleObject(l, INFINITE) #define UNLOCK(l) ReleaseMutex(l) #else -#define LOCK_T pthread_mutex_t* +#define LOCK_T pthread_mutex_t * #define LOCK(l) (void)pthread_mutex_lock(l) #define UNLOCK(l) (void)pthread_mutex_unlock(l) #endif #endif /* My internal definition of a factor */ -typedef struct { +typedef struct +{ /* group[i] is the level of observation i */ int *group; /* invgpsize[j] is the 1/(size of level j) */ - double *invgpsize; + double *invgpsize; double *gpsize; - int *gpl; /* group list */ - int *ii; /* indices into gpl */ + int *gpl; /* group list */ + int *ii; /* indices into gpl */ double *x; /* optional interaction covariate */ int nlevels; int oneiter; } FACTOR; - - /* Routines used in more than one source file */ -FACTOR** makefactors(SEXP flist, int allowmissing, double *weights); -int checkInterrupt(); -void initmsg(); +FACTOR **makefactors(SEXP flist, int allowmissing, double *weights); +extern int checkInterrupt(void); +extern void initmsg(void); void pushmsg(char *s, LOCK_T lock); void printmsg(LOCK_T lock); @@ -128,8 +122,8 @@ SEXP MY_kaczmarz(SEXP flist, SEXP vlist, SEXP Reps, SEXP initial, SEXP Rcores); SEXP MY_wwcomp(SEXP flist); SEXP MY_conncomp(SEXP flist); SEXP MY_demeanlist(SEXP vlist, SEXP flist, SEXP Ricpt, SEXP Reps, - SEXP scores, SEXP quiet, SEXP gkacc, SEXP Rmeans, - SEXP weights, SEXP Rscale, SEXP attrs); + SEXP scores, SEXP quiet, SEXP gkacc, SEXP Rmeans, + SEXP weights, SEXP Rscale, SEXP attrs); SEXP MY_scalecols(SEXP mat, SEXP vec); SEXP MY_pdaxpy(SEXP inX, SEXP inY, SEXP inbeta); SEXP MY_piproduct(SEXP inX, SEXP inY); @@ -137,9 +131,8 @@ SEXP MY_setdimnames(SEXP obj, SEXP nm); SEXP MY_dsyrk(SEXP inbeta, SEXP inC, SEXP inalpha, SEXP inA); SEXP MY_sandwich(SEXP inalpha, SEXP inbread, SEXP inmeat); SEXP MY_address(SEXP x); -//SEXP MY_named(SEXP x, SEXP n); +// SEXP MY_named(SEXP x, SEXP n); SEXP inplace(SEXP x); -SEXP Crowsum(SEXP Rmat, SEXP Rfactor, SEXP Rmean); +SEXP crowsum(SEXP Rmat, SEXP Rfactor, SEXP Rmean); // SEXP MY_ppf(SEXP flist, SEXP Rtype); - diff --git a/src/utils.c b/src/utils.c index 13cbf88..7b2fe49 100644 --- a/src/utils.c +++ b/src/utils.c @@ -1,30 +1,44 @@ -#define USE_RINTERNALS 1 // needed because SET_REFCNT use in MY_named -#include // needed because SET_REFCNT use in MY_named #include "lfe.h" -SEXP MY_scalecols(SEXP mat, SEXP vec) { - if(!isMatrix(mat)) error("first argument should be a matrix"); + +#define USE_FC_LEN_T +#ifndef FCONE +#define FCONE +#endif + +SEXP MY_scalecols(SEXP mat, SEXP vec) +{ + if (!isMatrix(mat)) + error("first argument should be a matrix"); mybigint_t col = ncols(mat), row = nrows(mat); - if(isMatrix(vec)) { - if(row != nrows(vec)) error("Rows of matrix should be the same as rows of vector"); + if (isMatrix(vec)) + { + if (row != nrows(vec)) + error("Rows of matrix should be the same as rows of vector"); double *cmat = REAL(mat); double *cvec = REAL(AS_NUMERIC(vec)); - for(mybigint_t j = 0; j < col; j++) { - double *cc = &cmat[j*row]; - for(mybigint_t i = 0; i < row; i++) { - double tmp = 0.0; - for(int k = 0; k < ncols(vec); k++) - tmp += cc[i]*cvec[i+k*row]; - cc[i] = tmp; + for (mybigint_t j = 0; j < col; j++) + { + double *cc = &cmat[j * row]; + for (mybigint_t i = 0; i < row; i++) + { + double tmp = 0.0; + for (int k = 0; k < ncols(vec); k++) + tmp += cc[i] * cvec[i + k * row]; + cc[i] = tmp; } } - } else { - if(row != LENGTH(vec)) error("length of vector %d is different from number of rows %d",LENGTH(vec),row); + } + else + { + if (row != LENGTH(vec)) + error("length of vector %d is different from number of rows %d", LENGTH(vec), row); double *cmat = REAL(mat); double *cvec = REAL(AS_NUMERIC(vec)); - for(mybigint_t j = 0; j < col; j++) { - double *cc = &cmat[j*row]; - for(mybigint_t i = 0; i < row; i++) - cc[i] *= cvec[i]; + for (mybigint_t j = 0; j < col; j++) + { + double *cc = &cmat[j * row]; + for (mybigint_t i = 0; i < row; i++) + cc[i] *= cvec[i]; } } return mat; @@ -39,75 +53,86 @@ SEXP MY_scalecols(SEXP mat, SEXP vec) { X + t(beta * t(Y)), so this is a utility to save some costly transposes. used in cgsolve() */ -SEXP MY_pdaxpy(SEXP inX, SEXP inY, SEXP inbeta) { - mybigint_t col = ncols(inX), row=nrows(inX); - if(col != ncols(inY) || row != nrows(inY)) +SEXP MY_pdaxpy(SEXP inX, SEXP inY, SEXP inbeta) +{ + mybigint_t col = ncols(inX), row = nrows(inX); + if (col != ncols(inY) || row != nrows(inY)) error("X and Y should have the same shape"); - if(LENGTH(inbeta) != col) + if (LENGTH(inbeta) != col) error("beta should have the same length as the number of columns of Y"); double *X = REAL(inX); double *Y = REAL(inY); double *beta = REAL(inbeta); SEXP res; - PROTECT(res = allocMatrix(REALSXP,row,col)); + PROTECT(res = allocMatrix(REALSXP, row, col)); double *pres = REAL(res); - for(mybigint_t j= 0; j < col; j++) { + for (mybigint_t j = 0; j < col; j++) + { double b = beta[j]; - double *out = &pres[j*row]; - double *xin = &X[j*row]; - double *yin = &Y[j*row]; - for(mybigint_t i=0; i < row; i++) { - out[i] = xin[i] + b*yin[i]; + double *out = &pres[j * row]; + double *xin = &X[j * row]; + double *yin = &Y[j * row]; + for (mybigint_t i = 0; i < row; i++) + { + out[i] = xin[i] + b * yin[i]; } } UNPROTECT(1); - return(res); + return (res); } /* compute inner product pairwise of the columns of matrices X and Y This is diag(crossprod(X,Y)). */ -SEXP MY_piproduct(SEXP inX, SEXP inY) { - mybigint_t col = ncols(inX), row=nrows(inX); - if(col != ncols(inY) || row != nrows(inY)) +SEXP MY_piproduct(SEXP inX, SEXP inY) +{ + mybigint_t col = ncols(inX), row = nrows(inX); + if (col != ncols(inY) || row != nrows(inY)) error("X and Y should have the same shape"); double *X = REAL(inX); double *Y = REAL(inY); SEXP res; PROTECT(res = allocVector(REALSXP, col)); double *pres = REAL(res); - for(mybigint_t j= 0; j < col; j++) { - double *xin = &X[j*row]; - double *yin = &Y[j*row]; + for (mybigint_t j = 0; j < col; j++) + { + double *xin = &X[j * row]; + double *yin = &Y[j * row]; pres[j] = 0.0; - for(mybigint_t i= 0; i < row; i++) { - pres[j] += xin[i]*yin[i]; + for (mybigint_t i = 0; i < row; i++) + { + pres[j] += xin[i] * yin[i]; } } UNPROTECT(1); - return(res); + return (res); } // copy-free dimnames<- -SEXP MY_setdimnames(SEXP obj, SEXP nm) { - if(!isNull(obj)) setAttrib(obj, R_DimNamesSymbol, nm); - return(R_NilValue); +SEXP MY_setdimnames(SEXP obj, SEXP nm) +{ + if (!isNull(obj)) + setAttrib(obj, R_DimNamesSymbol, nm); + return (R_NilValue); } /* Compute and return alpha * bread %*% meat %*% bread */ -SEXP MY_sandwich(SEXP inalpha, SEXP inbread, SEXP inmeat) { +SEXP MY_sandwich(SEXP inalpha, SEXP inbread, SEXP inmeat) +{ double alpha = REAL(AS_NUMERIC(inalpha))[0]; - if(!isMatrix(inbread)) error("bread must be a matrix"); - if(!isMatrix(inmeat)) error("bread must be a matrix"); - if(ncols(inbread) != nrows(inbread)) + if (!isMatrix(inbread)) + error("bread must be a matrix"); + if (!isMatrix(inmeat)) + error("bread must be a matrix"); + if (ncols(inbread) != nrows(inbread)) error("bread must be square matrix"); - if(ncols(inmeat) != nrows(inmeat)) + if (ncols(inmeat) != nrows(inmeat)) error("meat must be square matrix"); - if(ncols(inmeat) != ncols(inbread)) + if (ncols(inmeat) != ncols(inbread)) error("bread and meat must have the same size"); int N = ncols(inmeat); @@ -115,92 +140,99 @@ SEXP MY_sandwich(SEXP inalpha, SEXP inbread, SEXP inmeat) { double *bread = REAL(inbread); SEXP ret; double *tmp; - tmp = (double*) R_alloc(N*N,sizeof(double)); + tmp = (double *)R_alloc(N * N, sizeof(double)); PROTECT(ret = allocMatrix(REALSXP, N, N)); double *out = REAL(ret); double zero = 0.0; double one = 1.0; - F77_CALL(dsymm)("R","U",&N,&N,&one,bread,&N,meat,&N,&zero,tmp,&N); - F77_CALL(dsymm)("L","U",&N,&N,&alpha,bread,&N,tmp,&N,&zero,out,&N); + F77_CALL(dsymm) + ("R", "U", &N, &N, &one, bread, &N, meat, &N, &zero, tmp, &N FCONE FCONE); + F77_CALL(dsymm) + ("L", "U", &N, &N, &alpha, bread, &N, tmp, &N, &zero, out, &N FCONE FCONE); UNPROTECT(1); return ret; } - // copy-free dsyrk C = beta*C + alpha * A' A -SEXP MY_dsyrk(SEXP inbeta, SEXP inC, SEXP inalpha, SEXP inA) { +SEXP MY_dsyrk(SEXP inbeta, SEXP inC, SEXP inalpha, SEXP inA) +{ double beta = REAL(AS_NUMERIC(inbeta))[0]; double alpha = REAL(AS_NUMERIC(inalpha))[0]; - if(!isMatrix(inC)) error("C must be a matrix"); - if(!isMatrix(inA)) error("A must be a matrix"); + if (!isMatrix(inC)) + error("C must be a matrix"); + if (!isMatrix(inA)) + error("A must be a matrix"); - if(ncols(inC) != nrows(inC)) { - error("C must be a square matrix, it is %d x %d",nrows(inC), ncols(inC)); + if (ncols(inC) != nrows(inC)) + { + error("C must be a square matrix, it is %d x %d", nrows(inC), ncols(inC)); } int N = nrows(inC); double *C = REAL(inC); - if(ncols(inA) != ncols(inC)) { - error("A (%d x %d) must have the same number of columns as C (%d x %d)",nrows(inA),ncols(inA),nrows(inC),nrows(inC)); + if (ncols(inA) != ncols(inC)) + { + error("A (%d x %d) must have the same number of columns as C (%d x %d)", nrows(inA), ncols(inA), nrows(inC), nrows(inC)); } int K = nrows(inA); double *A = REAL(inA); - F77_CALL(dsyrk)("U","T",&N, &K, &alpha, A, &K, &beta, C, &N); + F77_CALL(dsyrk) + ("U", "T", &N, &K, &alpha, A, &K, &beta, C, &N FCONE FCONE); // fill in the lower triangular part - for(mybigint_t row=0; row < N; row++) { - for(mybigint_t col=0; col < row; col++) { - C[col*N + row] = C[row*N+col]; + for (mybigint_t row = 0; row < N; row++) + { + for (mybigint_t col = 0; col < row; col++) + { + C[col * N + row] = C[row * N + col]; } } return R_NilValue; } - // debugging memory copy -SEXP MY_address(SEXP x) { +SEXP MY_address(SEXP x) +{ char chr[30]; - sprintf(chr, "adr=%p, named=%d", (void*)x, NAMED(x)); - return(mkString(chr)); + snprintf(chr, sizeof(chr), "adr=%p, named=%d", (void *)x, NAMED(x)); + return (mkString(chr)); } -SEXP MY_named(SEXP x, SEXP n) { - if(isNull(n)) { +SEXP MY_named(SEXP x, SEXP n) +{ + if (isNull(n)) + { // return NAMED status SEXP res = allocVector(INTSXP, 1); PROTECT(res); -#if defined(COMPUTE_REFCNT_VALUES) - INTEGER(res)[0] = REFCNT(x); -#else - INTEGER(res)[0] = NAMED(x); -#endif - setAttrib(res,install("x"),x); + INTEGER(res) + [0] = NAMED(x); + setAttrib(res, install("x"), x); UNPROTECT(1); - return(res); + return (res); } // set named status. Use this at your own peril. Seriously. Things may ... -#if defined(COMPUTE_REFCNT_VALUES) - SET_REFCNT(x,INTEGER(n)[0]); -#else - SET_NAMED(x,INTEGER(n)[0]); -#endif - return(x); + SET_NAMED(x, INTEGER(n)[0]); + return (x); } /* Trickery to check for interrupts when using threads */ -static void chkIntFn(void *dummy) { - if(dummy==NULL){}; //avoid pedantic warning about unused parameter +static void chkIntFn(void *dummy) +{ + if (dummy == NULL) + { + }; // avoid pedantic warning about unused parameter R_CheckUserInterrupt(); } -/* this will call the above in a top-level context so it won't +/* this will call the above in a top-level context so it won't longjmp-out of context */ -int checkInterrupt() { +extern int checkInterrupt(void) +{ return (R_ToplevelExec(chkIntFn, NULL) == 0); } - /* More trickery, we can't printf in threads since the R API is not thread-safe. So set up a message stack. This is pushed in the threads and popped in the main thread. */ @@ -209,37 +241,45 @@ int checkInterrupt() { static char *msgstack[MSGLIM]; static int msgptr; -void initmsg() { +extern void initmsg(void) +{ msgptr = 0; } /* Craft our own strdup, it's not supported everywhere */ -static char *mystrdup(char *s) { - char *sc = (char*)malloc(strlen(s)+1); - if(sc != NULL) strcpy(sc,s); - return(sc); +static char *mystrdup(char *s) +{ + char *sc = (char *)malloc(strlen(s) + 1); + if (sc != NULL) + strcpy(sc, s); + return (sc); } -void pushmsg(char *s, LOCK_T lock) { -#ifdef NOTHREADS +void pushmsg(char *s, LOCK_T lock) +{ +#ifdef NOTHREADS REprintf(s); #else LOCK(lock); - if(msgptr < MSGLIM) { + if (msgptr < MSGLIM) + { msgstack[msgptr++] = mystrdup(s); } UNLOCK(lock); #endif } -void printmsg(LOCK_T lock) { +void printmsg(LOCK_T lock) +{ #ifdef NOTHREADS return; #else char *s; int i; LOCK(lock); - for(i = 0; i < msgptr; i++) { + for (i = 0; i < msgptr; i++) + { s = msgstack[i]; - if(s != NULL) { + if (s != NULL) + { REprintf(s); free(s); } diff --git a/tests/Examples/lfe-Ex.Rout.save b/tests/Examples/lfe-Ex.Rout.save deleted file mode 100644 index b571d86..0000000 --- a/tests/Examples/lfe-Ex.Rout.save +++ /dev/null @@ -1,1373 +0,0 @@ - -R Under development (unstable) (2019-12-10 r77548) -- "Unsuffered Consequences" -Copyright (C) 2019 The R Foundation for Statistical Computing -Platform: x86_64-pc-linux-gnu (64-bit) - -R is free software and comes with ABSOLUTELY NO WARRANTY. -You are welcome to redistribute it under certain conditions. -Type 'license()' or 'licence()' for distribution details. - - Natural language support but running in an English locale - -R is a collaborative project with many contributors. -Type 'contributors()' for more information and -'citation()' on how to cite R or R packages in publications. - -Type 'demo()' for some demos, 'help()' for on-line help, or -'help.start()' for an HTML browser interface to help. -Type 'q()' to quit R. - -> pkgname <- "lfe" -> source(file.path(R.home("share"), "R", "examples-header.R")) -> options(warn = 1) -> library('lfe') -Loading required package: Matrix -> -> base::assign(".oldSearch", base::search(), pos = 'CheckExEnv') -> base::assign(".old_wd", base::getwd(), pos = 'CheckExEnv') -> cleanEx() -> nameEx("bccorr") -> ### * bccorr -> -> flush(stderr()); flush(stdout()) -> -> ### Name: bccorr -> ### Title: Compute limited mobility bias corrected correlation between -> ### fixed effects -> ### Aliases: bccorr -> -> ### ** Examples -> -> x <- rnorm(500) -> x2 <- rnorm(length(x)) -> -> ## create individual and firm -> id <- factor(sample(40,length(x),replace=TRUE)) -> firm <- factor(sample(30,length(x),replace=TRUE,prob=c(2,rep(1,29)))) -> foo <- factor(sample(20,length(x),replace=TRUE)) -> ## effects -> id.eff <- rnorm(nlevels(id)) -> firm.eff <- rnorm(nlevels(firm)) -> foo.eff <- rnorm(nlevels(foo)) -> ## left hand side -> y <- x + 0.25*x2 + id.eff[id] + firm.eff[firm] + foo.eff[foo] + rnorm(length(x)) -> -> # make a data frame -> fr <- data.frame(y,x,x2,id,firm,foo) -> ## estimate and print result -> est <- felm(y ~ x+x2|id+firm+foo, data=fr, keepX=TRUE) -> # find bias corrections -> bccorr(est) - corr v1 v2 cov d1 d2 - 0.008569979 0.933723478 1.297425684 0.009432572 0.082334457 0.061969275 - d12 --0.004721002 -> -> -> -> cleanEx() -> nameEx("btrap") -> ### * btrap -> -> flush(stderr()); flush(stdout()) -> -> ### Name: btrap -> ### Title: Bootstrap standard errors for the group fixed effects -> ### Aliases: btrap -> -> ### ** Examples -> -> -> oldopts <- options(lfe.threads=2) -> ## create covariates -> x <- rnorm(3000) -> x2 <- rnorm(length(x)) -> -> ## create individual and firm -> id <- factor(sample(700,length(x),replace=TRUE)) -> firm <- factor(sample(300,length(x),replace=TRUE)) -> -> ## effects -> id.eff <- rlnorm(nlevels(id)) -> firm.eff <- rexp(nlevels(firm)) -> -> ## left hand side -> y <- x + 0.25*x2 + id.eff[id] + firm.eff[firm] + rnorm(length(x)) -> -> ## estimate and print result -> est <- felm(y ~ x+x2 | id + firm) -> summary(est) - -Call: - felm(formula = y ~ x + x2 | id + firm) - -Residuals: - Min 1Q Median 3Q Max --2.5692 -0.5356 0.0000 0.5265 3.0282 - -Coefficients: - Estimate Std. Error t value Pr(>|t|) -x 0.98931 0.02125 46.55 <2e-16 *** -x2 0.25710 0.02188 11.75 <2e-16 *** ---- -Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 - -Residual standard error: 0.981 on 2006 degrees of freedom -Multiple R-squared(full model): 0.8987 Adjusted R-squared: 0.8485 -Multiple R-squared(proj model): 0.5385 Adjusted R-squared: 0.3101 -F-statistic(full model):17.92 on 993 and 2006 DF, p-value: < 2.2e-16 -F-statistic(proj model): 1170 on 2 and 2006 DF, p-value: < 2.2e-16 - - -> ## extract the group effects -> alpha <- getfe(est) -> head(alpha) - effect obs comp fe idx -id.1 0.7558736 6 1 id 1 -id.2 -0.7764543 1 1 id 2 -id.3 0.9832472 7 1 id 3 -id.4 2.7358165 4 1 id 4 -id.5 0.9211335 4 1 id 5 -id.6 3.8248845 2 1 id 6 -> ## bootstrap standard errors -> head(btrap(alpha,est)) - effect obs comp fe idx se -id.1 0.7558736 6 1 id 1 0.4565457 -id.2 -0.7764543 1 1 id 2 0.8725093 -id.3 0.9832472 7 1 id 3 0.3565344 -id.4 2.7358165 4 1 id 4 0.4586499 -id.5 0.9211335 4 1 id 5 0.4740637 -id.6 3.8248845 2 1 id 6 0.6129610 -> -> ## bootstrap some differences -> ef <- function(v,addnames) { -+ w <- c(v[2]-v[1],v[3]-v[2],v[3]-v[1]) -+ if(addnames) { -+ names(w) <-c('id2-id1','id3-id2','id3-id1') -+ attr(w,'extra') <- list(note=c('line1','line2','line3')) -+ } -+ w -+ } -> # check that it's estimable -> is.estimable(ef,est$fe) -[1] TRUE -> -> head(btrap(alpha,est,ef=ef)) - effect note se -id2-id1 -1.5323279 line1 0.8175766 -id3-id2 1.7597015 line2 0.8622066 -id3-id1 0.2273736 line3 0.4678413 -> options(oldopts) -> -> -> -> -> cleanEx() -> nameEx("cgsolve") -> ### * cgsolve -> -> flush(stderr()); flush(stdout()) -> -> ### Name: cgsolve -> ### Title: Solve a symmetric linear system with the conjugate gradient -> ### method -> ### Aliases: cgsolve -> -> ### ** Examples -> -> -> N <- 100000 -> # create some factors -> f1 <- factor(sample(34000,N,replace=TRUE)) -> f2 <- factor(sample(25000,N,replace=TRUE)) -> # a matrix of dummies, which probably is rank deficient -> B <- makeDmatrix(list(f1,f2)) -> dim(B) -[1] 100000 56753 -> # create a right hand side -> b <- as.matrix(B %*% rnorm(ncol(B))) -> # solve B' B x = B' b -> sol <- cgsolve(crossprod(B), crossprod(B, b), eps=-1e-2) -> #verify solution -> sqrt(sum((B %*% sol - b)^2)) -[1] 0.0007802951 -> -> -> -> -> cleanEx() -> nameEx("chainsubset") -> ### * chainsubset -> -> flush(stderr()); flush(stdout()) -> -> ### Name: chainsubset -> ### Title: Chain subset conditions -> ### Aliases: chainsubset -> -> ### ** Examples -> -> set.seed(48) -> N <- 10000 -> dat <- data.frame(y=rnorm(N), x=rnorm(N)) -> # It's not the same as and'ing the conditions: -> felm(y ~ x,data=dat,subset=chainsubset(x < mean(y), y < 2*mean(x))) -(Intercept) x - -2.058171 -0.002087 -> felm(y ~ x,data=dat,subset=chainsubset(y < 2*mean(x), x < mean(y))) -(Intercept) x - -0.74578 0.03168 -> felm(y ~ x,data=dat,subset=(x < mean(y)) & (y < 2*mean(x))) -(Intercept) x - -0.791573 0.007835 -> lm(y ~ x, data=dat, subset=chainsubset(x < mean(y), x > mean(y))) - -Call: -lm(formula = y ~ x, data = dat, subset = chainsubset(x < mean(y), - x > mean(y))) - -Coefficients: -(Intercept) x - 0.0586 -14.1016 - -> -> -> -> cleanEx() -> nameEx("compfactor") -> ### * compfactor -> -> flush(stderr()); flush(stdout()) -> -> ### Name: compfactor -> ### Title: Find the connected components -> ### Aliases: compfactor -> -> ### ** Examples -> -> -> ## create two factors -> f1 <- factor(sample(300,400,replace=TRUE)) -> f2 <- factor(sample(300,400,replace=TRUE)) -> -> ## find the components -> cf <- compfactor(list(f1=f1,f2=f2)) -> -> ## show the third largest component -> fr <- data.frame(f1,f2,cf) -> fr[cf==3,] - f1 f2 cf -7 277 111 3 -28 198 115 3 -52 141 65 3 -55 108 219 3 -204 108 65 3 -246 141 264 3 -278 108 115 3 -301 277 264 3 -325 141 185 3 -352 109 65 3 -396 277 63 3 -> -> -> -> -> cleanEx() -> nameEx("condfstat") -> ### * condfstat -> -> flush(stderr()); flush(stdout()) -> -> ### Name: condfstat -> ### Title: Compute conditional F statistic for weak instruments in an -> ### IV-estimation with multiple endogenous variables -> ### Aliases: condfstat -> -> ### ** Examples -> -> -> z1 <- rnorm(4000) -> z2 <- rnorm(length(z1)) -> u <- rnorm(length(z1)) -> # make x1, x2 correlated with errors u -> -> x1 <- z1 + z2 + 0.2*u + rnorm(length(z1)) -> x2 <- z1 + 0.94*z2 - 0.3*u + rnorm(length(z1)) -> y <- x1 + x2 + u -> est <- felm(y ~ 1 | 0 | (x1 | x2 ~ z1 + z2)) -> summary(est) - -Call: - felm(formula = y ~ 1 | 0 | (x1 | x2 ~ z1 + z2)) - -Residuals: - Min 1Q Median 3Q Max --4.3972 -0.7110 -0.0092 0.7191 3.5336 - -Coefficients: - Estimate Std. Error t value Pr(>|t|) -(Intercept) -0.01607 0.01962 -0.819 0.412825 -`x1(fit)` 0.90461 0.24097 3.754 0.000177 *** -`x2(fit)` 1.11122 0.25643 4.333 1.5e-05 *** ---- -Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 - -Residual standard error: 1.047 on 3997 degrees of freedom -Multiple R-squared(full model): 0.8992 Adjusted R-squared: 0.8992 -Multiple R-squared(proj model): 0.8992 Adjusted R-squared: 0.8992 -F-statistic(full model):1.479e+04 on 2 and 3997 DF, p-value: < 2.2e-16 -F-statistic(proj model): 1.479e+04 on 2 and 3997 DF, p-value: < 2.2e-16 -F-statistic(endog. vars):1.479e+04 on 2 and 3997 DF, p-value: < 2.2e-16 - - -> ## Not run: -> ##D summary(est$stage1, lhs='x1') -> ##D summary(est$stage1, lhs='x2') -> ## End(Not run) -> -> # the joint significance of the instruments in both the first stages are ok: -> t(sapply(est$stage1$lhs, function(lh) waldtest(est$stage1, ~z1|z2, lhs=lh))) - p chi2 df1 p.F F df2 -x1 0 8416.522 2 0 4208.261 3997 -x2 0 6889.635 2 0 3444.817 3997 -> # everything above looks fine, t-tests for instruments, -> # as well as F-tests for excluded instruments in the 1st stages. -> # The conditional F-test reveals that the instruments are jointly weak -> # (it's close to being only one instrument, z1+z2, for both x1 and x2) -> condfstat(est, quantiles=c(0.05, 0.95)) - x1 x2 -iid F 8.023097 8.021403 -attr(,"df1") -[1] 1 -attr(,"quantiles") - 5% 95% -x1 0.4076302 1.238373 -x2 0.7415684 1.635039 -attr(,"quantiles")attr(,"q") -[1] 0.05 0.95 -attr(,"quantiles")attr(,"samples") -[1] 100 -> -> -> -> -> cleanEx() -> nameEx("demeanlist") -> ### * demeanlist -> -> flush(stderr()); flush(stdout()) -> -> ### Name: demeanlist -> ### Title: Centre vectors on multiple groups -> ### Aliases: demeanlist -> -> ### ** Examples -> -> oldopts <- options(lfe.threads=1) -> ## create a matrix -> mtx <- data.frame(matrix(rnorm(999),ncol=3)) -> # a list of factors -> rgb <- c('red','green','blue') -> fl <- replicate(4, factor(sample(rgb,nrow(mtx),replace=TRUE)), simplify=FALSE) -> names(fl) <- paste('g',seq_along(fl),sep='') -> # centre on all means -> mtx0 <- demeanlist(mtx,fl) -> head(data.frame(mtx0,fl)) - X1 X2 X3 g1 g2 g3 g4 -1 -0.64196076 1.4456118 -1.3025993 red red green blue -2 -0.06326532 -0.2158289 0.6045219 green green green red -3 -0.40641864 0.7223192 0.2344265 blue blue blue green -4 1.40753491 0.8459852 2.1454416 blue red red blue -5 0.09988421 -0.2905892 -1.0429431 blue blue red blue -6 -0.75980638 -0.1095204 1.1490805 blue green green green -> # verify that the group means for the columns are zero -> lapply(fl, function(f) apply(mtx0,2,tapply,f,mean)) -$g1 - X1 X2 X3 -blue 4.029092e-12 -4.113887e-13 -3.656474e-10 -green 3.004903e-12 6.395388e-14 3.751943e-10 -red -7.680146e-12 3.528239e-13 -5.681341e-11 - -$g2 - X1 X2 X3 -blue -4.816087e-18 6.232643e-18 2.922735e-17 -green -2.429030e-18 7.222454e-18 -2.173773e-17 -red 6.679396e-18 5.807147e-18 1.077004e-17 - -$g3 - X1 X2 X3 -blue -2.727461e-11 7.652167e-13 3.084120e-10 -green -3.646924e-11 -4.633995e-14 -3.716751e-10 -red 6.153743e-11 -5.825537e-13 1.427556e-10 - -$g4 - X1 X2 X3 -blue 2.109630e-13 -4.238365e-13 1.771802e-10 -green -1.727496e-13 1.232453e-13 1.812780e-11 -red -1.337501e-14 2.238489e-13 -1.548618e-10 - -> options(oldopts) -> -> -> -> cleanEx() -> nameEx("efactory") -> ### * efactory -> -> flush(stderr()); flush(stdout()) -> -> ### Name: efactory -> ### Title: Create estimable function -> ### Aliases: efactory -> -> ### ** Examples -> -> -> oldopts <- options(lfe.threads=1) -> id <- factor(sample(5000,50000,replace=TRUE)) -> firm <- factor(sample(3000,50000,replace=TRUE)) -> fl <- list(id=id,firm=firm) -> obj <- list(fe=fl,cfactor=compfactor(fl)) -> ## the trivial least-norm transformtion, which by the way is non-estimable -> print(ef <- efactory(obj,'ln')) -function (v, addnames) -{ - if (addnames) { - names(v) <- nm - attr(v, "extra") <- list(obs = obs, comp = comp, fe = fef, - idx = idx) - } - v -} - - -> is.estimable(ef,fl) -Warning in is.estimable(ef, fl) : - non-estimable function, largest error 0.003 in coordinate 1050 ("id.1050") -[1] FALSE -> ## then the default -> print(ef <- efactory(obj,'ref')) -function (v, addnames) -{ - esum <- sum(v[extrarefs]) - df <- v[refsubs] - sub <- ifelse(is.na(df), 0, df) - df <- v[refsuba] - add <- ifelse(is.na(df), 0, df + esum) - v <- v - sub + add - if (addnames) { - names(v) <- nm - attr(v, "extra") <- list(obs = obs, comp = comp, fe = fef, - idx = idx) - } - v -} - - -attr(,"verified") -[1] TRUE -> is.estimable(ef,fl) -[1] TRUE -> # get the names of the coefficients, i.e. the nm-variable in the function -> head(evalq(nm,environment(ef))) -[1] "id.1" "id.2" "id.3" "id.4" "id.5" "id.6" -> options(oldopts) -> -> -> -> -> cleanEx() -> nameEx("felm") -> ### * felm -> -> flush(stderr()); flush(stdout()) -> -> ### Name: felm -> ### Title: Fit a linear model with multiple group fixed effects -> ### Aliases: felm -> -> ### ** Examples -> -> -> oldopts <- options(lfe.threads=1) -> -> ## Simulate data -> -> # Covariates -> x <- rnorm(1000) -> x2 <- rnorm(length(x)) -> # Individuals and firms -> id <- factor(sample(20,length(x),replace=TRUE)) -> firm <- factor(sample(13,length(x),replace=TRUE)) -> # Effects for them -> id.eff <- rnorm(nlevels(id)) -> firm.eff <- rnorm(nlevels(firm)) -> # Left hand side -> u <- rnorm(length(x)) -> y <- x + 0.5*x2 + id.eff[id] + firm.eff[firm] + u -> -> ## Estimate the model and print the results -> est <- felm(y ~ x + x2 | id + firm) -> summary(est) - -Call: - felm(formula = y ~ x + x2 | id + firm) - -Residuals: - Min 1Q Median 3Q Max --3.4329 -0.6925 -0.0261 0.6946 2.7795 - -Coefficients: - Estimate Std. Error t value Pr(>|t|) -x 0.96790 0.03120 31.02 <2e-16 *** -x2 0.48603 0.03123 15.56 <2e-16 *** ---- -Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 - -Residual standard error: 1.009 on 966 degrees of freedom -Multiple R-squared(full model): 0.7456 Adjusted R-squared: 0.7369 -Multiple R-squared(proj model): 0.5549 Adjusted R-squared: 0.5397 -F-statistic(full model):85.78 on 33 and 966 DF, p-value: < 2.2e-16 -F-statistic(proj model): 602.1 on 2 and 966 DF, p-value: < 2.2e-16 - - -> -> ## Not run: -> ##D # Compare with lm -> ##D summary(lm(y ~ x + x2 + id + firm-1)) -> ## End(Not run) -> -> ## Example with 'reverse causation' (IV regression) -> -> # Q and W are instrumented by x3 and the factor x4. -> x3 <- rnorm(length(x)) -> x4 <- sample(12,length(x),replace=TRUE) -> Q <- 0.3*x3 + x + 0.2*x2 + id.eff[id] + 0.3*log(x4) - 0.3*y + rnorm(length(x),sd=0.3) -> W <- 0.7*x3 - 2*x + 0.1*x2 - 0.7*id.eff[id] + 0.8*cos(x4) - 0.2*y+ rnorm(length(x),sd=0.6) -> # Add them to the outcome variable -> y <- y + Q + W -> -> ## Estimate the IV model and report robust SEs -> ivest <- felm(y ~ x + x2 | id + firm | (Q|W ~ x3 + factor(x4))) -> summary(ivest, robust=TRUE) - -Call: - felm(formula = y ~ x + x2 | id + firm | (Q | W ~ x3 + factor(x4))) - -Residuals: - Min 1Q Median 3Q Max --3.4927 -0.6913 -0.0241 0.6976 2.7567 - -Coefficients: - Estimate Robust s.e t value Pr(>|t|) -x 0.89435 0.16419 5.447 6.5e-08 *** -x2 0.48430 0.03160 15.326 < 2e-16 *** -`Q(fit)` 1.03746 0.10833 9.577 < 2e-16 *** -`W(fit)` 0.97835 0.04679 20.908 < 2e-16 *** ---- -Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 - -Residual standard error: 1.017 on 964 degrees of freedom -Multiple R-squared(full model): 0.6976 Adjusted R-squared: 0.6867 -Multiple R-squared(proj model): 0.6043 Adjusted R-squared: 0.59 -F-statistic(full model, *iid*): 71.4 on 35 and 964 DF, p-value: < 2.2e-16 -F-statistic(proj model): 460.9 on 4 and 964 DF, p-value: < 2.2e-16 -F-statistic(endog. vars):681.6 on 2 and 964 DF, p-value: < 2.2e-16 - - -> condfstat(ivest) - Q W -iid F 40.07999 66.90775 -attr(,"df1") -[1] 11 -> -> ## Not run: -> ##D # Compare with the not instrumented fit: -> ##D summary(felm(y ~ x + x2 + Q + W | id + firm)) -> ## End(Not run) -> -> ## Example with multiway clustering -> -> # Create a large cluster group (500 clusters) and a small one (20 clusters) -> cl1 <- factor(sample(rep(1:500, length.out=length(x)))) -> cl2 <- factor(sample(rep(1:20, length.out=length(x)))) -> # Function for adding clustered noise to our outcome variable -> cl_noise <- function(cl) { -+ obs_per_cluster <- length(x)/nlevels(cl) -+ unlist(replicate(nlevels(cl), rnorm(obs_per_cluster, mean=rnorm(1), sd=runif(1)), simplify=FALSE)) -+ } -> # New outcome variable -> y_cl <- x + 0.5*x2 + id.eff[id] + firm.eff[firm] + cl_noise(cl1) + cl_noise(cl2) -> -> ## Estimate and print the model with cluster-robust SEs (default) -> est_cl <- felm(y_cl ~ x + x2 | id + firm | 0 | cl1 + cl2) -> summary(est_cl) - -Call: - felm(formula = y_cl ~ x + x2 | id + firm | 0 | cl1 + cl2) - -Residuals: - Min 1Q Median 3Q Max --4.8756 -0.9697 0.0232 1.0041 5.2598 - -Coefficients: - Estimate Cluster s.e. t value Pr(>|t|) -x 0.97330 0.05007 19.44 <2e-16 *** -x2 0.53359 0.03043 17.54 <2e-16 *** ---- -Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 - -Residual standard error: 1.528 on 966 degrees of freedom -Multiple R-squared(full model): 0.5746 Adjusted R-squared: 0.5601 -Multiple R-squared(proj model): 0.3633 Adjusted R-squared: 0.3415 -F-statistic(full model, *iid*):39.54 on 33 and 966 DF, p-value: < 2.2e-16 -F-statistic(proj model): 252.8 on 2 and 499 DF, p-value: < 2.2e-16 - - -> -> ## Not run: -> ##D # Print ordinary standard errors: -> ##D summary(est_cl, robust = FALSE) -> ##D # Match cluster-robust SEs from Stata's reghdfe package: -> ##D summary(felm(y_cl ~ x + x2 | id + firm | 0 | cl1 + cl2, cmethod="reghdfe")) -> ## End(Not run) -> -> options(oldopts) -> -> -> -> -> cleanEx() -> nameEx("fevcov") -> ### * fevcov -> -> flush(stderr()); flush(stdout()) -> -> ### Name: fevcov -> ### Title: Compute limited mobility bias corrected covariance matrix -> ### between fixed effects -> ### Aliases: fevcov -> -> ### ** Examples -> -> -> x <- rnorm(5000) -> x2 <- rnorm(length(x)) -> -> ## create individual and firm -> id <- factor(sample(40,length(x),replace=TRUE)) -> firm <- factor(sample(30,length(x),replace=TRUE,prob=c(2,rep(1,29)))) -> foo <- factor(sample(20,length(x),replace=TRUE)) -> ## effects -> id.eff <- rnorm(nlevels(id)) -> firm.eff <- runif(nlevels(firm)) -> foo.eff <- rchisq(nlevels(foo),df=1) -> ## left hand side -> id.m <- id.eff[id] -> firm.m <- firm.eff[firm] -> foo.m <- foo.eff[foo] -> # normalize them -> id.m <- id.m/sd(id.m) -> firm.m <- firm.m/sd(firm.m) -> foo.m <- foo.m/sd(foo.m) -> y <- x + 0.25*x2 + id.m + firm.m + foo.m + rnorm(length(x),sd=2) -> z <- x + 0.5*x2 + 0.7*id.m + 0.5*firm.m + 0.3*foo.m + rnorm(length(x),sd=2) -> # make a data frame -> fr <- data.frame(y,z,x,x2,id,firm,foo) -> ## estimate and print result -> est <- felm(y|z ~ x+x2|id+firm+foo, data=fr, keepX=TRUE) -> # find bias corrections, there's little bias in this example -> print(yv <- fevcov(est, lhs='y')) - id firm foo -id 1.031163938 0.006729247 -0.006303323 -firm 0.006729247 1.046116404 0.010550689 -foo -0.006303323 0.010550689 1.058185345 -attr(,"bias") - id firm foo -id 0.0307580151 -0.0002094436 -0.0001106874 -firm -0.0002094436 0.0224115429 -0.0001244024 -foo -0.0001106874 -0.0001244024 0.0132579840 -> ## Here's how to compute the unbiased correlation matrix: -> cm <- cov2cor(yv) -> structure(cm,bias=NULL) - id firm foo -id 1.000000000 0.006479069 -0.006034272 -firm 0.006479069 1.000000000 0.010027899 -foo -0.006034272 0.010027899 1.000000000 -> -> -> -> -> cleanEx() -> nameEx("fixedse") -> ### * fixedse -> -> flush(stderr()); flush(stdout()) -> -> ### Name: fixedse -> ### Title: Compute standard errors for fixed effects -> ### Aliases: fixedse -> ### Keywords: internal -> -> ### ** Examples -> -> x <- rnorm(1000) -> f <- factor(sample(5,1000,replace=TRUE)) -> y <- x + (1:5)[f] + rnorm(1000) -> est <- felm(y ~ x | f, keepX=TRUE) -> #both bootstrap and computed se: -> cbind(getfe(est,ef=efactory(est,'ref'),se=TRUE), fse=fixedse(est)) - effect obs comp fe idx se fse -f.1 0.9569311 185 1 f 1 0.07807322 0.07535548 -f.2 1.9949066 223 1 f 2 0.06163607 0.06860181 -f.3 3.0326896 185 1 f 3 0.07650146 0.07532290 -f.4 4.0294033 196 1 f 4 0.07595171 0.07317074 -f.5 4.9413841 211 1 f 5 0.07077708 0.07054858 -> #compare with lm: -> summary(lm(y ~x+f-1)) - -Call: -lm(formula = y ~ x + f - 1) - -Residuals: - Min 1Q Median 3Q Max --3.5561 -0.6437 -0.0299 0.7166 2.9214 - -Coefficients: - Estimate Std. Error t value Pr(>|t|) -x 1.01522 0.03135 32.38 <2e-16 *** -f1 0.95693 0.07536 12.70 <2e-16 *** -f2 1.99491 0.06860 29.08 <2e-16 *** -f3 3.03269 0.07532 40.26 <2e-16 *** -f4 4.02940 0.07317 55.07 <2e-16 *** -f5 4.94138 0.07055 70.04 <2e-16 *** ---- -Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 - -Residual standard error: 1.024 on 994 degrees of freedom -Multiple R-squared: 0.9216, Adjusted R-squared: 0.9211 -F-statistic: 1947 on 6 and 994 DF, p-value: < 2.2e-16 - -> -> -> -> cleanEx() -> nameEx("getfe") -> ### * getfe -> -> flush(stderr()); flush(stdout()) -> -> ### Name: getfe -> ### Title: Retrieve the group fixed effects -> ### Aliases: getfe -> ### Keywords: models regression -> -> ### ** Examples -> -> -> oldopts <- options(lfe.threads=2) -> ## create covariates -> x <- rnorm(4000) -> x2 <- rnorm(length(x)) -> -> ## create individual and firm -> id <- factor(sample(500,length(x),replace=TRUE)) -> firm <- factor(sample(300,length(x),replace=TRUE)) -> -> ## effects -> id.eff <- rlnorm(nlevels(id)) -> firm.eff <- rexp(nlevels(firm)) -> -> ## left hand side -> y <- x + 0.25*x2 + id.eff[id] + firm.eff[firm] + rnorm(length(x)) -> -> ## estimate and print result -> est <- felm(y ~ x+x2 | id + firm) -> summary(est) - -Call: - felm(formula = y ~ x + x2 | id + firm) - -Residuals: - Min 1Q Median 3Q Max --2.94796 -0.59482 -0.01038 0.59008 3.15936 - -Coefficients: - Estimate Std. Error t value Pr(>|t|) -x 1.01409 0.01687 60.11 <2e-16 *** -x2 0.26766 0.01752 15.28 <2e-16 *** ---- -Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 - -Residual standard error: 0.9892 on 3200 degrees of freedom -Multiple R-squared(full model): 0.9149 Adjusted R-squared: 0.8937 -Multiple R-squared(proj model): 0.5457 Adjusted R-squared: 0.4323 -F-statistic(full model):43.06 on 799 and 3200 DF, p-value: < 2.2e-16 -F-statistic(proj model): 1922 on 2 and 3200 DF, p-value: < 2.2e-16 - - -> ## extract the group effects -> alpha <- getfe(est,se=TRUE) -> -> ## find some estimable functions, with standard errors, we don't get -> ## names so we must precompute some numerical indices in ef -> idx <- match(c('id.5','id.6','firm.11','firm.12'),rownames(alpha)) -> alpha[idx,] - effect obs comp fe idx se -id.5 2.8685887 6 1 id 5 0.4564400 -id.6 2.1252963 7 1 id 6 0.4294043 -firm.11 0.2437394 18 1 firm 11 0.2809155 -firm.12 0.2925972 14 1 firm 12 0.3584084 -> ef <- function(v,addnames) { -+ w <- c(v[idx[[2]]]-v[idx[[1]]],v[idx[[4]]]+v[idx[[1]]], -+ v[idx[[4]]]-v[idx[[3]]]) -+ if(addnames) names(w) <-c('id6-id5','f12+id5','f12-f11') -+ w -+ } -> getfe(est,ef=ef,se=TRUE) - effect se -id6-id5 -0.74329244 0.5178066 -f12+id5 3.16118596 0.4907870 -f12-f11 0.04885782 0.3275784 -> options(oldopts) -> ## Not run: -> ##D summary(lm(y ~ x+x2+id+firm-1)) -> ## End(Not run) -> -> -> -> -> cleanEx() -> nameEx("is.estimable") -> ### * is.estimable -> -> flush(stderr()); flush(stdout()) -> -> ### Name: is.estimable -> ### Title: Verify estimability of function -> ### Aliases: is.estimable -> -> ### ** Examples -> -> -> oldopts <- options(lfe.threads=1) -> ## create individual and firm -> id <- factor(sample(5000,50000,replace=TRUE)) -> firm <- factor(sample(3000,50000,replace=TRUE)) -> -> ## create some estimable functions. It's faster to -> ## use numerical indices in ef rather than strings, and the input v -> ## to ef has no names, we have to add them when requested -> ef <- function(v,addnames) { -+ w <- c(v[6]-v[5],v[7000]+v[5],v[7000]-v[6000]) -+ if(addnames) names(w) <-c('id6-id5','f2k+id5','f2k-f1k') -+ w -+ } -> is.estimable(ef,list(id=id,firm=firm)) -[1] TRUE -> -> ## Then make an error; in the last coordinate, sum two firms -> ef <- function(v,addnames) { -+ w <- c(v[6]-v[5],v[7000]+v[5],v[7000]+v[6000]) -+ if(addnames) names(w) <-c('id6-id5','f2k+id5','f2k-f1k') -+ w -+ } -> is.estimable(ef, list(id=id,firm=firm), keepdiff=TRUE) -Warning in is.estimable(ef, list(id = id, firm = firm), keepdiff = TRUE) : - non-estimable function, largest error 0.01 in coordinate 3 ("f2k-f1k") -[1] FALSE -attr(,"diff") - id6-id5 f2k+id5 f2k-f1k -2.797201e-11 1.212475e-11 1.436988e-02 -> options(oldopts) -> -> -> -> -> cleanEx() -> nameEx("kaczmarz") -> ### * kaczmarz -> -> flush(stderr()); flush(stdout()) -> -> ### Name: kaczmarz -> ### Title: Solve a linear system defined by factors -> ### Aliases: kaczmarz -> -> ### ** Examples -> -> -> ## create factors -> f1 <- factor(sample(24000,100000,replace=TRUE)) -> f2 <- factor(sample(20000,length(f1),replace=TRUE)) -> f3 <- factor(sample(10000,length(f1),replace=TRUE)) -> f4 <- factor(sample(8000,length(f1),replace=TRUE)) -> ## the matrix of dummies -> D <- makeDmatrix(list(f1,f2,f3,f4)) -> dim(D) -[1] 100000 61499 -> ## an x -> truex <- runif(ncol(D)) -> ## and the right hand side -> R <- as.vector(D %*% truex) -> ## solve it -> sol <- kaczmarz(list(f1,f2,f3,f4),R) -> ## verify that the solution solves the system Dx = R -> sqrt(sum((D %*% sol - R)^2)) -[1] 3.600146e-07 -> ## but the solution is not equal to the true x, because the system is -> ## underdetermined -> sqrt(sum((sol - truex)^2)) -[1] 52.45481 -> ## moreover, the solution from kaczmarz has smaller norm -> sqrt(sum(sol^2)) < sqrt(sum(truex^2)) -[1] TRUE -> -> -> -> -> cleanEx() -> nameEx("lfe-package") -> ### * lfe-package -> -> flush(stderr()); flush(stdout()) -> -> ### Name: lfe-package -> ### Title: Overview. Linear Group Fixed Effects -> ### Aliases: lfe-package lfe -> ### Keywords: models regression -> -> ### ** Examples -> -> -> oldopts <- options(lfe.threads=1) -> x <- rnorm(1000) -> x2 <- rnorm(length(x)) -> id <- factor(sample(10,length(x),replace=TRUE)) -> firm <- factor(sample(3,length(x),replace=TRUE,prob=c(2,1.5,1))) -> year <- factor(sample(10,length(x),replace=TRUE,prob=c(2,1.5,rep(1,8)))) -> id.eff <- rnorm(nlevels(id)) -> firm.eff <- rnorm(nlevels(firm)) -> year.eff <- rnorm(nlevels(year)) -> y <- x + 0.25*x2 + id.eff[id] + firm.eff[firm] + -+ year.eff[year] + rnorm(length(x)) -> est <- felm(y ~ x+x2 | id + firm + year) -> summary(est) - -Call: - felm(formula = y ~ x + x2 | id + firm + year) - -Residuals: - Min 1Q Median 3Q Max --3.4685 -0.6450 -0.0293 0.6631 2.8981 - -Coefficients: - Estimate Std. Error t value Pr(>|t|) -x 0.91611 0.03103 29.520 < 2e-16 *** -x2 0.24114 0.03095 7.792 1.68e-14 *** ---- -Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 - -Residual standard error: 1.008 on 977 degrees of freedom -Multiple R-squared(full model): 0.7621 Adjusted R-squared: 0.7567 -Multiple R-squared(proj model): 0.4893 Adjusted R-squared: 0.4778 -F-statistic(full model):142.3 on 22 and 977 DF, p-value: < 2.2e-16 -F-statistic(proj model): 468.1 on 2 and 977 DF, p-value: < 2.2e-16 -*** Standard errors may be too high due to more than 2 groups and exactDOF=FALSE - - -> -> getfe(est,se=TRUE) - effect obs comp fe idx se -id.1 -0.25923494 88 1 id 1 0.13632132 -id.2 1.06062345 90 1 id 2 0.14546483 -id.3 0.06034638 90 1 id 3 0.13201034 -id.4 0.44271187 109 1 id 4 0.13256707 -id.5 0.51696693 98 1 id 5 0.13117504 -id.6 1.43021599 108 1 id 6 0.14482875 -id.7 0.23001980 100 1 id 7 0.13899292 -id.8 1.07498270 112 1 id 8 0.13169287 -id.9 -0.38411917 97 1 id 9 0.14582033 -id.10 1.62621598 108 1 id 10 0.12815771 -firm.1 0.00000000 415 1 firm 1 0.00000000 -firm.2 -0.15451360 359 1 firm 2 0.07219465 -firm.3 2.59917570 226 1 firm 3 0.08069538 -year.1 0.00000000 166 2 year 1 0.00000000 -year.2 -1.90629543 144 2 year 2 0.11552284 -year.3 -0.72206632 90 2 year 3 0.13077982 -year.4 -1.34267982 89 2 year 4 0.13507944 -year.5 -1.84370897 87 2 year 5 0.13840235 -year.6 0.39746394 81 2 year 6 0.13503649 -year.7 -0.62958841 82 2 year 7 0.15321151 -year.8 -0.33821454 72 2 year 8 0.15098197 -year.9 -1.77057443 90 2 year 9 0.12325007 -year.10 0.15294690 99 2 year 10 0.12299566 -> # compare with an ordinary lm -> summary(lm(y ~ x+x2+id+firm+year-1)) - -Call: -lm(formula = y ~ x + x2 + id + firm + year - 1) - -Residuals: - Min 1Q Median 3Q Max --3.4685 -0.6450 -0.0293 0.6631 2.8981 - -Coefficients: - Estimate Std. Error t value Pr(>|t|) -x 0.91611 0.03103 29.520 < 2e-16 *** -x2 0.24114 0.03095 7.792 1.68e-14 *** -id1 -0.25923 0.13589 -1.908 0.056720 . -id2 1.06062 0.13104 8.094 1.71e-15 *** -id3 0.06035 0.13485 0.448 0.654599 -id4 0.44271 0.12720 3.480 0.000523 *** -id5 0.51697 0.13192 3.919 9.52e-05 *** -id6 1.43022 0.12637 11.318 < 2e-16 *** -id7 0.23002 0.13224 1.739 0.082278 . -id8 1.07498 0.12360 8.697 < 2e-16 *** -id9 -0.38412 0.13391 -2.869 0.004213 ** -id10 1.62622 0.12621 12.885 < 2e-16 *** -firm2 -0.15451 0.07361 -2.099 0.036071 * -firm3 2.59918 0.08399 30.945 < 2e-16 *** -year2 -1.90630 0.11514 -16.557 < 2e-16 *** -year3 -0.72207 0.13338 -5.414 7.78e-08 *** -year4 -1.34268 0.13293 -10.101 < 2e-16 *** -year5 -1.84371 0.13476 -13.682 < 2e-16 *** -year6 0.39746 0.13803 2.880 0.004070 ** -year7 -0.62959 0.13669 -4.606 4.65e-06 *** -year8 -0.33821 0.14309 -2.364 0.018290 * -year9 -1.77057 0.13274 -13.339 < 2e-16 *** -year10 0.15295 0.12844 1.191 0.234006 ---- -Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 - -Residual standard error: 1.008 on 977 degrees of freedom -Multiple R-squared: 0.7679, Adjusted R-squared: 0.7625 -F-statistic: 140.6 on 23 and 977 DF, p-value: < 2.2e-16 - -> options(oldopts) -> -> -> -> -> cleanEx() -> nameEx("makeDmatrix") -> ### * makeDmatrix -> -> flush(stderr()); flush(stdout()) -> -> ### Name: makeDmatrix -> ### Title: Make sparse matrix of dummies from factor list -> ### Aliases: makeDmatrix -> -> ### ** Examples -> -> -> fl <- lapply(1:3, function(i) factor(sample(3,10,replace=TRUE))) -> fl -[[1]] - [1] 1 3 1 2 1 3 3 2 2 3 -Levels: 1 2 3 - -[[2]] - [1] 3 1 1 1 2 2 2 2 3 1 -Levels: 1 2 3 - -[[3]] - [1] 3 1 1 1 1 2 1 1 2 2 -Levels: 1 2 3 - -> makeDmatrix(fl, weights=seq(0.1,1,0.1)) -10 x 9 sparse Matrix of class "dgCMatrix" - f1.1 f1.2 f1.3 f2.1 f2.2 f2.3 f3.1 f3.2 f3.3 - [1,] 0.1 . . . . 0.1 . . 0.1 - [2,] . . 0.2 0.2 . . 0.2 . . - [3,] 0.3 . . 0.3 . . 0.3 . . - [4,] . 0.4 . 0.4 . . 0.4 . . - [5,] 0.5 . . . 0.5 . 0.5 . . - [6,] . . 0.6 . 0.6 . . 0.6 . - [7,] . . 0.7 . 0.7 . 0.7 . . - [8,] . 0.8 . . 0.8 . 0.8 . . - [9,] . 0.9 . . . 0.9 . 0.9 . -[10,] . . 1.0 1.0 . . . 1.0 . -> -> -> -> -> cleanEx() -> nameEx("mctrace") -> ### * mctrace -> -> flush(stderr()); flush(stdout()) -> -> ### Name: mctrace -> ### Title: Compute trace of a large matrix by sample means -> ### Aliases: mctrace -> -> ### ** Examples -> -> -> A <- matrix(rnorm(25),5) -> fun <- function(x) A %*% x -> sum(diag(A)) -[1] 0.6807816 -> sum(eigen(A,only.values=TRUE)$values) -[1] 0.6807816+0i -> # mctrace is not really useful for small problems. -> mctrace(fun,ncol(A),tol=0.05) -[1] 0.6590496 -attr(,"sd") -[1] 0.03281585 -attr(,"iterations") -[1] 11102 -> # try a larger problem (3000x3000): -> f1 <- factor(sample(1500,3000,replace=TRUE)) -> f2 <- factor(sample(1500,3000,replace=TRUE)) -> fl <- list(f1,f2) -> mctrace(fl,tol=-5) -[1] 503.8349 -attr(,"sd") -[1] 4.485407 -attr(,"iterations") -[1] 28 -> # exact: -> length(f1) - nlevels(f1) - nlevels(f2) + nlevels(compfactor(fl)) -[1] 503 -> -> -> -> -> cleanEx() -> nameEx("nlexpect") -> ### * nlexpect -> -> flush(stderr()); flush(stdout()) -> -> ### Name: nlexpect -> ### Title: Compute expectation of a function of the coefficients. -> ### Aliases: nlexpect -> -> ### ** Examples -> -> -> N <- 100 -> x1 <- rnorm(N) -> # make some correlation -> x2 <- 0.1*rnorm(N) + 0.1*x1 -> y <- 0.1*x1 + x2 + rnorm(N) -> summary(est <- felm(y ~ x1 + x2)) - -Call: - felm(formula = y ~ x1 + x2) - -Residuals: - Min 1Q Median 3Q Max --2.94359 -0.43645 0.00202 0.63692 2.63941 - -Coefficients: - Estimate Std. Error t value Pr(>|t|) -(Intercept) 0.02535 0.10519 0.241 0.810 -x1 0.17458 0.15997 1.091 0.278 -x2 0.46533 1.09479 0.425 0.672 - -Residual standard error: 1.043 on 97 degrees of freedom -Multiple R-squared(full model): 0.03737 Adjusted R-squared: 0.01752 -Multiple R-squared(proj model): 0.03737 Adjusted R-squared: 0.01752 -F-statistic(full model):1.883 on 2 and 97 DF, p-value: 0.1577 -F-statistic(proj model): 1.883 on 2 and 97 DF, p-value: 0.1577 - - -> pt1 <- coef(est)['x1'] -> pt2 <- coef(est)['x2'] -> # expected values of coefficients, should match the summary -> # and variance, i.e. square of standard errors in the summary -> nlexpect(est, quote(c(x1=x1,x2=x2,var=c((x1-pt1)^2,(x2-pt2)^2)))) - x1 x2 var.x1 var.x2 -0.17450420 0.46517350 0.02611881 1.22460757 -> ## No test: -> # the covariance matrix: -> nlexpect(est, tcrossprod(as.matrix(c(x1-pt1,x2-pt2)))) - x1 x2 -x1 0.02609831 -0.122408 -x2 -0.12240804 1.224730 -> ## End(No test) -> #Wald test of single variable -> waldtest(est, ~x1)['p.F'] - p.F -0.2778482 -> # the same with nlexpect, i.e. probability for observing abs(x1)>abs(pt1) conditional -> # on E(x1) = 0. -> nlexpect(est, (x1-pt1)^2 > pt1^2, tol=1e-7, vectorize=TRUE) - x1 -0.2778482 -> # which is the same as -> 2*nlexpect(est, x1*sign(pt1) < 0) - x1 -0.2779329 -> -> # Here's a multivalued, vectorized example -> nlexpect(est, rbind(a=x1*x2 < pt1, b=x1*x2 > 0), vectorize=TRUE, method='divonne') - x1 -a 0.8587288 -b 0.5324731 -> ## No test: -> -> # Non-linear test: -> -> # A simple one, what's the probability that product x1*x2 is between 0 and |E(x1)|? -> nlexpect(est, x1*x2 > 0 & x1*x2 < abs(pt1), vectorize=TRUE, method='divonne') - x1 -0.3914772 -> # Then a more complicated one with the expected value of a polynomal in the coefficients -> f <- function(x) c(poly=x[['x1']]*(6*x[['x1']]-x[['x2']]^2)) -> # This is the linearized test: -> waldtest(est, f)['p.F'] - p.F -0.7432511 -> # In general, for a function f, the non-linear Wald test is something like -> # the following: -> # expected value of function -> Ef <- nlexpect(est, f, coefs=c('x1','x2')) -> # point value of function -> Pf <- f(c(pt1,pt2)) -> # similar to a Wald test, but non-linear: -> nlexpect(est, function(x) (f(x)-Ef)^2 > Pf^2, c('x1','x2'), vectorize=TRUE) - poly -0.6851808 -> # one-sided -> nlexpect(est, function(x) f(x)-Ef > abs(Pf), c('x1','x2'), vectorize=TRUE) - poly -0.267744 -> # other sided -> nlexpect(est, function(x) f(x)-Ef < -abs(Pf), c('x1','x2'), vectorize=TRUE) - poly -0.4193062 -> ## End(No test) -> -> -> -> -> cleanEx() -> nameEx("varvars") -> ### * varvars -> -> flush(stderr()); flush(stdout()) -> -> ### Name: varvars -> ### Title: Compute the variance of the fixed effect variance estimate -> ### Aliases: varvars -> -> ### ** Examples -> -> -> x <- rnorm(500) -> x2 <- rnorm(length(x)) -> -> ## create individual and firm -> id <- factor(sample(40,length(x),replace=TRUE)) -> firm <- factor(sample(30,length(x),replace=TRUE,prob=c(2,rep(1,29)))) -> foo <- factor(sample(20,length(x),replace=TRUE)) -> ## effects -> id.eff <- rnorm(nlevels(id)) -> firm.eff <- rnorm(nlevels(firm)) -> foo.eff <- rnorm(nlevels(foo)) -> ## left hand side -> id.m <- id.eff[id] -> firm.m <- 2*firm.eff[firm] -> foo.m <- 3*foo.eff[foo] -> y <- x + 0.25*x2 + id.m + firm.m + foo.m + rnorm(length(x)) -> -> # make a data frame -> fr <- data.frame(y,x,x2,id,firm,foo) -> ## estimate and print result -> est <- felm(y ~ x+x2|id+firm+foo, data=fr, keepX=TRUE) -> alpha <- getfe(est) -> # estimate the covariance matrix of the fixed effects -> fevcov(est, alpha) - id firm foo -id 0.93372348 0.01224916 -0.11164451 -firm 0.01224916 4.95014733 0.04605155 -foo -0.11164451 0.04605155 6.48715940 -attr(,"bias") - id firm foo -id 0.082334457 -0.004334716 -0.002472491 -firm -0.004334716 0.060817078 -0.002995544 -foo -0.002472491 -0.002995544 0.044123684 -> # estimate variances of the diagonal -> varvars(est, alpha) -[1] 0.008598521 0.043820907 0.057656021 -> -> -> -> -> cleanEx() -> nameEx("waldtest") -> ### * waldtest -> -> flush(stderr()); flush(stdout()) -> -> ### Name: waldtest -> ### Title: Compute Wald test for joint restrictions on coefficients -> ### Aliases: waldtest -> -> ### ** Examples -> -> -> x <- rnorm(10000) -> x2 <- rnorm(length(x)) -> y <- x - 0.2*x2 + rnorm(length(x)) -> #Also works for lm -> summary(est <- lm(y ~ x + x2 )) - -Call: -lm(formula = y ~ x + x2) - -Residuals: - Min 1Q Median 3Q Max --3.6477 -0.6843 0.0158 0.6744 3.6442 - -Coefficients: - Estimate Std. Error t value Pr(>|t|) -(Intercept) 0.007554 0.010078 0.75 0.454 -x 0.999265 0.009956 100.37 <2e-16 *** -x2 -0.199066 0.010172 -19.57 <2e-16 *** ---- -Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 - -Residual standard error: 1.008 on 9997 degrees of freedom -Multiple R-squared: 0.5108, Adjusted R-squared: 0.5107 -F-statistic: 5219 on 2 and 9997 DF, p-value: < 2.2e-16 - -> # We do not reject the true values -> waldtest(est, ~ x-1|x2+0.2|`(Intercept)`) - p chi2 df1 p.F F df2 - 0.9019639 0.5757362 3.0000000 0.9019611 0.1919121 9997.0000000 -attr(,"formula") -~x - 1 | x2 + 0.2 | `(Intercept)` - -> # The Delta-method coincides when the function is linear: -> waldtest(est, function(x) x - c(0, 1, -0.2)) - p chi2 df1 p.F F df2 - 0.9019639 0.5757362 3.0000000 0.9019611 0.1919121 9997.0000000 -attr(,"formula") -function (x) -x - c(0, 1, -0.2) - -> -> -> -> -> ### *