diff --git a/R/CSEGriskfigure.R b/R/CSEGriskfigure.R index e75a970..5b038ca 100644 --- a/R/CSEGriskfigure.R +++ b/R/CSEGriskfigure.R @@ -1,3 +1,52 @@ +#' Plot Extinction Risk Metrics +#' +#' @description +#' Generates a six-panel plot of extinction risk metrics used in Population Viability Analysis (PVA). This is a function used by one of the vignettes in the [MARSS-package()]. +#' +#' @param data A data matrix with 2 columns; time in first column and counts in second column. Note time is down rows, which is different than the base [MARSS-package()] functions. +#' @param te Length of forecast period (positive integer) +#' @param absolutethresh Is extinction threshold an absolute number? (T/F) +#' @param threshold Extinction threshold either as an absolute number, if `absolutethresh=TRUE`, or as a fraction of current population count, if `absolutethresh=FALSE`. +#' @param datalogged Are the data already logged? (T/F) +#' @param silent Suppress printed output? (T/F) +#' @param return.model Return state-space model as [marssMLE()] object? (T/F) +#' @param CI.method Confidence interval method: "hessian", "parametric", "innovations", or "none". See [MARSSparamCIs()]. +#' @param CI.sim Number of simulations for bootstrap confidence intervals (positive integer). +#' +#' @details +#' * Panel 1: Time-series plot of the data. +#' * Panel 2: CDF of extinction risk. +#' * Panel 3: PDF of time to reach threshold. +#' * Panel 4: Probability of reaching different thresholds during forecast period. +#' * Panel 5: Sample projections. +#' * Panel 6: TMU plot (uncertainty as a function of the forecast). +#' +#' @return +#' If `return.model=TRUE`, an object of class [marssMLE()]. +#' +#' @references +#' Holmes, E. E., E. J. Ward, and M. D. Scheuerell (2012) Analysis of multivariate time-series using the MARSS package. NOAA Fisheries, Northwest Fisheries Science +#' Center, 2725 Montlake Blvd E., Seattle, WA 98112 Type `RShowDoc("UserGuide",package="MARSS")` to open a copy. +#' +#' (theory behind the figure) Holmes, E. E., J. L. Sabo, S. V. Viscido, and W. F. Fagan. (2007) A statistical approach to quasi-extinction forecasting. Ecology Letters 10:1182-1198. +#' +#' (CDF and PDF calculations) Dennis, B., P. L. Munholland, and J. M. Scott. (1991) Estimation of growth and extinction parameters for endangered species. Ecological Monographs 61:115-143. +#' +#' (TMU figure) Ellner, S. P. and E. E. Holmes. (2008) Resolving the debate on when extinction risk is predictable. Ecology Letters 11:E1-E5. +#' +#' @author +#' Eli Holmes, NOAA, Seattle, USA, and Steve Ellner, Cornell Univ. +#' +#' @seealso [MARSSboot()], [marssMLE()], [CSEGtmufigure()] +#' +#' @examples +#' d <- harborSeal[, 1:2] +#' kem <- CSEGriskfigure(d, datalogged = TRUE) +#' +#' @name CSEGriskfigure +#' @aliases CSEGriskfigure +#' @keywords experimental hplot +#' @export CSEGriskfigure <- function(data, te = 100, absolutethresh = FALSE, threshold = 0.1, datalogged = FALSE, silent = FALSE, return.model = FALSE, CI.method = "hessian", CI.sim = 1000) { if (!(CI.method %in% c("hessian", "parametric", "innovations", "none"))) { stop("Stopped in CSEGriskfigure(): Allowed values of 'CI.method' are \"none\", \"hessian\", \"parametric\", and \"innovations\".\n", call. = FALSE) diff --git a/R/CSEGtmufigure.R b/R/CSEGtmufigure.R index 789c305..8816eba 100644 --- a/R/CSEGtmufigure.R +++ b/R/CSEGtmufigure.R @@ -1,3 +1,56 @@ +#' Plot Forecast Uncertainty +#' +#' @description +#' Plot the uncertainty in the probability of hitting a percent threshold +#' (quasi-extinction) for a single random walk trajectory. This is the +#' quasi-extinction probability used in Population Viability Analysis. The +#' uncertainty is shown as a function of the forecast, where the forecast is +#' defined in terms of the forecast length (number of time steps) and +#' forecasted decline (percentage). This is a function used by one of the +#' vignettes in the [MARSS-package]. +#' +#' @param N Time steps between the first and last population data point (positive integer) +#' @param u Per time-step decline (-0.1 means a 10\% decline per time step; 1 means a doubling per time step.) +#' @param s2p Process variance (Q). (a positive number) +#' @param make.legend Add a legend to the plot? (T/F) +#' +#' @details +#' This figure shows the region of high uncertainty in dark grey. In this +#' region, the minimum 95 percent confidence intervals on the probability of +#' quasi-extinction span 80 percent of the 0 to 1 probability. Green hashing +#' indicates where the 95 percent upper bound does not exceed 5\% probability +#' of quasi-extinction. The red hashing indicates, where the 95 percent lower +#' bound is above 95\% probability of quasi-extinction. The light grey lies +#' between these two certain/uncertain extremes. The extinction calculation is +#' based on Dennis et al. (1991). The minimum theoretical confidence interval +#' is based on Fieberg and Ellner (2000). This figure was developed in Ellner +#' and Holmes (2008). +#' +#' Examples using this figure are shown in the +#' [User Guide](https://cran.r-project.org/package=MARSS/vignettes/UserGuide.pdf) +#' in the PVA chapter. +#' +#' @references +#' Dennis, B., P. L. Munholland, and J. M. Scott. (1991) Estimation of growth +#' and extinction parameters for endangered species. Ecological Monographs +#' 61:115-143. +#' +#' Fieberg, J. and Ellner, S.P. (2000) When is it meaningful to estimate an +#' extinction probability? Ecology, 81, 2040-2047. +#' +#' Ellner, S. P. and E. E. Holmes. (2008) Resolving the debate on when +#' extinction risk is predictable. Ecology Letters 11:E1-E5. +#' +#' @author +#' Eli Holmes, NOAA, Seattle, USA, and Steve Ellner, Cornell Univ. +#' +#' @seealso [CSEGriskfigure()] +#' +#' @examples +#' CSEGtmufigure(N = 20, u = -0.1, s2p = 0.01) +#' +#' @keywords experimental hplot +#' @export # This is a figure of the theoretical minimum uncertainty regions # sensu Ellner and Holmes figure 1 # Code written by Steven Ellner and Eli Holmes diff --git a/R/MARSS.R b/R/MARSS.R index cc6175d..ecde528 100644 --- a/R/MARSS.R +++ b/R/MARSS.R @@ -1,3 +1,340 @@ +#' Fit a MARSS Model via Maximum-Likelihood Estimation +#' +#' @description +#' This is the main function for fitting multivariate autoregressive +#' state-space (MARSS) models with linear constraints. Scroll down to the +#' bottom to see some short examples. To open a guide to show you how to get +#' started quickly, type `RShowDoc("Quick_Start",package="MARSS")`. To open +#' the MARSS User Guide from the command line, type +#' `RShowDoc("UserGuide",package="MARSS")`. To get an overview of the package +#' and all its main functions and how to get output (parameter estimates, +#' fitted values, residuals, Kalman filter or smoother output, or plots), go +#' to [MARSS-package]. If `MARSS()` is throwing errors or warnings that you +#' don't understand, try the Troubleshooting section of the user guide or type +#' [MARSSinfo()] at the command line. +#' +#' The default MARSS model form is "marxss", which is Multivariate +#' Auto-Regressive(1) eXogenous inputs State-Space model: +#' \deqn{\mathbf{x}_{t} = \mathbf{B}_t \mathbf{x}_{t-1} + \mathbf{u}_t + \mathbf{C}_t \mathbf{c}_t + \mathbf{G}_t \mathbf{w}_t, \textrm{ where } \mathbf{W}_t \sim \textrm{MVN}(0,\mathbf{Q}_t)}{x(t) = B(t) x(t-1) + u(t) + C(t) c(t) + G(t) w(t), where W(t) ~ MVN(0,Q(t))} +#' \deqn{\mathbf{y}_t = \mathbf{Z}_t \mathbf{x}_t + \mathbf{a}_t + \mathbf{D}_t \mathbf{d}_t + \mathbf{H}_t \mathbf{v}_t, \textrm{ where } \mathbf{V}_t \sim \textrm{MVN}(0,\mathbf{R}_t)}{y(t) = Z(t) x(t) + a(t) + D(t) d(t) + H(t) v(t), where V(t) ~ MVN(0,R(t))} +#' \deqn{\mathbf{X}_1 \sim \textrm{MVN}(\mathbf{x0}, \mathbf{V0}) \textrm{ or } \mathbf{X}_0 \sim \textrm{MVN}(\mathbf{x0}, \mathbf{V0}) }{X(1) ~ MVN(x0, V0) or X(0) ~ MVN(x0, V0) } +#' The parameters are everything except \eqn{\mathbf{x}}{x}, \eqn{\mathbf{y}}{y}, +#' \eqn{\mathbf{v}}{v}, \eqn{\mathbf{w}}{w}, \eqn{\mathbf{c}}{c} and +#' \eqn{\mathbf{d}}{d}. \eqn{\mathbf{y}}{y} are data (missing values allowed). +#' \eqn{\mathbf{c}}{c} and \eqn{\mathbf{d}}{d} are inputs (no missing values +#' allowed). All parameters (except \eqn{\mathbf{x0}}{x0} and +#' \eqn{\mathbf{V0}}{V0}) can be time-varying but by default, all are +#' time-constant (and the MARSS equation is generally written without the +#' \eqn{t} subscripts on the parameter matrices). All parameters can be zero, +#' including the variance matrices. +#' +#' The parameter matrices can have fixed values and linear constraints. This +#' is an example of a 3x3 matrix with linear constraints. All matrix elements +#' can be written as a linear function of \eqn{a}, \eqn{b}, and \eqn{c}: +#' \deqn{\left[\begin{array}{c c c} a+2b & 1 & a\\ 1+3a+b & 0 & b \\ 0 & -2 & c\end{array}\right]}{[a+2b 1 a \n 1+3a+b 0 b \n 0 -2 c ]} +#' +#' Values such as \eqn{a b} or \eqn{a^2} or \eqn{log(a)} are not linear constraints. +#' +#' @param y A n x T matrix of n time series over T time steps. Only y is +#' required for the function. A ts object (univariate or multivariate) can +#' be used and this will be converted to a matrix with time in the columns. +#' @param inits A list with the same form as the list outputted by `coef(fit)` +#' that specifies initial values for the parameters. See also [MARSS.marxss()]. +#' @param model Model specification using a list of parameter matrix text +#' shortcuts or matrices. See Details and [MARSS.marxss()] for the default +#' form. Or better yet open the Quick Start Guide +#' `RShowDoc("Quick_Start",package="MARSS")`. +#' @param miss.value Deprecated. Denote missing values by NAs in your data. +#' @param method Estimation method. MARSS provides an EM algorithm +#' (`method="kem"`) (see [MARSSkem()]) and the BFGS algorithm +#' (`method="BFGS"`) (see [MARSSoptim()]). +#' @param form The equation form used in the `MARSS()` call. The default is +#' "marxss". See [MARSS.marxss()] or [MARSS.dfa()]. +#' @param fit TRUE/FALSE Whether to fit the model to the data. If FALSE, a +#' [marssMLE] object with only the model is returned. +#' @param silent Setting to TRUE(1) suppresses printing of full error messages, +#' warnings, progress bars and convergence information. Setting to FALSE(0) +#' produces error output. Setting silent=2 will produce more verbose error +#' messages and progress information. +#' @param fun.kf What Kalman filter function to use. MARSS has two: +#' [MARSSkfas()] which is based on the Kalman filter in the +#' [KFAS](https://cran.r-project.org/package=KFAS) package based on Koopman +#' and Durbin and [MARSSkfss()] which is a native R implementation of the +#' Kalman filter and smoother in Shumway and Stoffer. The KFAS filter is +#' much faster. [MARSSkfas()] modifies the input and output in order to +#' output the lag-one covariance smoother needed for the EM algorithm (per +#' page 321 in Shumway and Stoffer (2000). +#' @param control Estimation options for the maximization algorithm. The +#' typically used control options for method="kem" are below but see +#' [marssMLE] for the full list of control options. Note many of these are +#' not allowed if method="BFGS"; see [MARSSoptim()] for the allowed control +#' options for this method. +#' +#' * `minit` The minimum number of iterations to do in the maximization +#' routine (if needed by method). If `method="kem"`, this is an easy way +#' to up the iterations and see how your estimates are converging. +#' (positive integer) +#' * `maxit` Maximum number of iterations to be used in the maximization +#' routine (if needed by method) (positive integer). +#' * `min.iter.conv.test` Minimum iterations to run before testing +#' convergence via the slope of the log parameter versus log iterations. +#' * `conv.test.deltaT=9` Number of iterations to use for the testing +#' convergence via the slope of the log parameter versus log iterations. +#' * `conv.test.slope.tol` The slope of the log parameter versus log +#' iteration to use as the cut-off for convergence. The default is 0.5 +#' which is a bit high. For final analyses, this should be set lower. If +#' you want to only use abstol as your convergence test, then set to +#' something very large, for example `conv.test.slope.tol=1000`. Type +#' `MARSSinfo(11)` to see some comments on when you might want to do this. +#' * `abstol` The logLik.(iter-1)-logLik.(iter) convergence tolerance for +#' the maximization routine. To meet convergence both the abstol and slope +#' tests must be passed. +#' * `allow.degen` Whether to try setting \eqn{\mathbf{Q}}{Q} or +#' \eqn{\mathbf{R}}{R} elements to zero if they appear to be going to zero. +#' * `trace` An integer specifying the level of information recorded and +#' error-checking run during the algorithms. `trace=0` specifies basic +#' error-checking and brief error-messages; `trace>0` will print full +#' error messages. In addition if trace>0, the Kalman filter output will +#' be added to the outputted [marssMLE] object. Additional information +#' recorded depends on the method of maximization. For the EM algorithm, a +#' record of each parameter estimate for each EM iteration will be added. +#' See [optim()] for trace output details for the BFGS method. +#' `trace=-1` will turn off most internal error-checking and most error +#' messages. The internal error checks are time expensive so this can speed +#' up model fitting. This is particularly useful for bootstrapping and +#' simulation studies. It is also useful if you get an error saying that +#' `MARSS()` stops in [MARSSkfss()] due to a `chol()` call. `MARSSkfss()` +#' uses matrix inversions and for some models these are unstable (high +#' condition value). `MARSSkfss()` is used for error-checks and does not +#' need to be called normally. +#' * `safe` Setting `safe=TRUE` runs the Kalman smoother after each +#' parameter update rather than running the smoother only once after +#' updating all parameters. The latter is faster but is not a strictly +#' correct EM algorithm. In most cases, `safe=FALSE` (default) will not +#' change the fits. If this setting does cause problems, you will know +#' because you will see an error regarding the log-likelihood dropping and +#' it will direct you to set `safe=TRUE`. +#' @param ... Optional arguments passed to function specified by form. +#' +#' @details +#' The `model` argument specifies the structure of your model. There is a +#' one-to-one correspondence between how you would write your model in matrix +#' form on the whiteboard and how you specify the model for `MARSS()`. Many +#' different types of multivariate time-series models can be converted to the +#' MARSS form. See the +#' [User Guide](https://cran.r-project.org/package=MARSS/vignettes/UserGuide.pdf) +#' and [Quick Start Guide](https://cran.r-project.org/package=MARSS/vignettes/Quick_Start.html) +#' for examples. +#' +#' The MARSS package has two forms for standard users: marxss and dfa. +#' +#' * [MARSS.marxss()] This is the default form. This is a MARSS model with +#' (optional) inputs \eqn{\mathbf{c}_t}{c(t)} or \eqn{\mathbf{d}_t}{d(t)}. +#' Most users will want this help page. +#' * [MARSS.dfa()] This is a model form to allow easier specification of +#' models for Dynamic Factor Analysis. The \eqn{\mathbf{Z}}{Z} parameters +#' has a specific form and the \eqn{\mathbf{Q}}{Q} is set at i.i.d +#' (diagonal) with variance of 1. +#' +#' Those looking to modify or understand the base code, should look at +#' [MARSS.marss()] and `MARSS.vectorized()`. These describe the forms used by +#' the base functions. The EM algorithm uses the MARSS model written in +#' vectorized form. This form is what allows linear constraints. +#' +#' The likelihood surface for MARSS models can be multimodal or with strong +#' ridges. It is recommended that for final analyses the estimates are checked +#' by using a Monte Carlo initial conditions search; see the chapter on initial +#' conditions searches in the User Guide. This requires more computation time, +#' but reduces the chance of the algorithm terminating at a local maximum and +#' not reaching the true MLEs. Also it is wise to check the EM results against +#' the BFGS results (if possible) if there are strong ridges in the likelihood. +#' Such ridges seems to slow down the EM algorithm considerably and can cause +#' the algorithm to report convergence far from the maximum-likelihood values. +#' EM steps up the likelihood and the convergence test is based on the rate of +#' change of the log-likelihood in each step. Once on a strong ridge, the +#' steps can slow dramatically. You can force the algorithm to keep working by +#' setting `minit`. BFGS seems less hindered by the ridges but can be +#' prodigiously slow for some multivariate problems. BFGS tends to work better +#' if you give it good initial conditions (see Examples below for how to do +#' this). +#' +#' If you are working with models with time-varying parameters, it is important +#' to notice the time-index for the parameters in the process equation (the +#' \eqn{\mathbf{x}}{x} equation). In some formulations (e.g. in +#' [KFAS::KFAS]), the process equation is +#' \eqn{\mathbf{x}_t=\mathbf{B}_{t-1}\mathbf{x}_{t-1}+\mathbf{w}_{t-1}}{x(t)=B(t-1)x(t-1)+w(t-1)} +#' so \eqn{\mathbf{B}_{t-1}}{B(t-1)} goes with \eqn{\mathbf{x}_t}{x(t)} not +#' \eqn{\mathbf{B}_t}{B(t)}. Thus one needs to be careful to line up the time +#' indices when passing in time-varying parameters to `MARSS()`. See the User +#' Guide for examples. +#' +#' @return +#' An object of class [marssMLE]. The structure of this object is discussed +#' below, but if you want to know how to get specific output (like residuals, +#' coefficients, smoothed states, confidence intervals, etc), see +#' [print.marssMLE()], [tidy.marssMLE()], [MARSSresiduals()] and +#' [plot.marssMLE()]. +#' +#' The outputted [marssMLE] object has the following components: +#' +#' * `model` MARSS model specification. It is a [marssMODEL] object in the +#' form specified by the user in the `MARSS()` call. This is used by print +#' functions so that the user sees the expected form. +#' * `marss` The [marssMODEL] object in marss form. This form is needed for +#' all the internal algorithms, thus is a required part of a [marssMLE] +#' object. +#' * `call` All the information passed in in the `MARSS()` call. +#' * `start` List with specifying initial values that were used for each +#' parameter matrix. +#' * `control` A list of estimation options, as specified by arguments +#' `control`. +#' * `method` Estimation method. +#' +#' If `fit=TRUE`, the following are also added to the [marssMLE] object. +#' If `fit=FALSE`, a [marssMLE] object ready for fitting via the specified +#' `method` is returned. +#' +#' * `par` A list of estimated parameter values in marss form. Use +#' [print.marssMLE()], [tidy.marssMLE()] or [coef.marssMLE()] for outputting +#' the model estimates in the `MARSS()` call (e.g. the default "marxss" form). +#' * `states` The expected value of \eqn{\mathbf{X}}{X} conditioned on all +#' the data, i.e. smoothed states. +#' * `states.se` The standard errors of the expected value of +#' \eqn{\mathbf{X}}{X}. +#' * `ytT` The expected value of \eqn{\mathbf{Y}}{Y} conditioned on all the +#' data. Note this is just \eqn{y} for those \eqn{y} that are not missing. +#' * `ytT.se` The standard errors of the expected value of +#' \eqn{\mathbf{Y}}{Y}. Note this is 0 for any non-missing \eqn{y}. +#' * `numIter` Number of iterations required for convergence. +#' * `convergence` Convergence status. 0 means converged successfully, 3 +#' means all parameters were fixed (so model did not need to be fit) and -1 +#' means call was made with `fit=FALSE` and parameters were not fixed (thus +#' no `$par` element and Kalman filter/smoother cannot be run). Anything +#' else is a warning or error. 2 means the [marssMLE] object has an error; +#' the object is returned so you can debug it. The other numbers are errors +#' during fitting. The error code depends on the fitting method. See +#' [MARSSkem()] and [MARSSoptim()]. +#' * `logLik` Log-likelihood. +#' * `AIC` Akaike's Information Criterion. +#' * `AICc` Sample size corrected AIC. +#' +#' If `control$trace` is set to 1 or greater, the following are also added to +#' the [marssMLE] object. +#' +#' * `kf` A list containing Kalman filter/smoother output from [MARSSkf()]. +#' This is not normally added to a [marssMLE] object since it is verbose, +#' but can be added using [MARSSkf()]. +#' * `Ey` A list containing output from [MARSShatyt()]. This isn't normally +#' added to a [marssMLE] object since it is verbose, but can be computed +#' using [MARSShatyt()]. +#' +#' @references +#' The MARSS User Guide: Holmes, E. E., E. J. Ward, and M. D. Scheuerell +#' (2012) Analysis of multivariate time-series using the MARSS package. NOAA +#' Fisheries, Northwest Fisheries Science Center, 2725 Montlake Blvd E., +#' Seattle, WA 98112. Type `RShowDoc("UserGuide",package="MARSS")` to open a +#' copy. +#' +#' Holmes, E. E. (2012). Derivation of the EM algorithm for constrained and +#' unconstrained multivariate autoregressive state-space (MARSS) models. +#' Technical Report. arXiv:1302.3919 [stat.ME] +#' +#' Holmes, E. E., E. J. Ward and K. Wills. (2012) MARSS: Multivariate +#' autoregressive state-space models for analyzing time-series data. R Journal +#' 4: 11-19. +#' +#' @author +#' Eli Holmes, Eric Ward and Kellie Wills, NOAA, Seattle, USA. +#' +#' @seealso +#' [marssMLE], [MARSSkem()], [MARSSoptim()], [MARSSkf()], [MARSS-package], +#' [print.marssMLE()], [plot.marssMLE()], [print.marssMODEL()], +#' [MARSS.marxss()], [MARSS.dfa()], [fitted.marssMLE()], +#' [residuals.marssMLE()], [MARSSresiduals()], [predict.marssMLE()], +#' [tsSmooth.marssMLE()], [tidy.marssMLE()], [coef.marssMLE()] +#' +#' @examples +#' dat <- t(harborSealWA) +#' dat <- dat[2:4, ] # remove the year row +#' # fit a model with 1 hidden state and 3 observation time series +#' kemfit <- MARSS(dat, model = list( +#' Z = matrix(1, 3, 1), +#' R = "diagonal and equal" +#' )) +#' kemfit$model # This gives a description of the model +#' print(kemfit$model) # same as kemfit$model +#' summary(kemfit$model) # This shows the model structure +#' +#' # add CIs to a marssMLE object +#' # default uses an estimated Hessian matrix +#' kem.with.hess.CIs <- MARSSparamCIs(kemfit) +#' kem.with.hess.CIs +#' +#' # fit a model with 3 hidden states (default) +#' kemfit <- MARSS(dat, silent = TRUE) # suppress printing +#' kemfit +#' +#' # Fit the above model with BFGS using a short EM fit as initial conditions +#' kemfit <- MARSS(dat, control=list(minit=5, maxit=5)) +#' bffit <- MARSS(dat, method="BFGS", inits=kemfit) +#' +#' # fit a model with 3 correlated hidden states +#' # with one variance and one covariance +#' # maxit set low to speed up example, but more iters are needed for convergence +#' kemfit <- MARSS(dat, model = list(Q = "equalvarcov"), control = list(maxit = 50)) +#' # use Q="unconstrained" to allow different variances and covariances +#' +#' # fit a model with 3 independent hidden states +#' # where each observation time series is independent +#' # the hidden trajectories 2-3 share their U parameter +#' kemfit <- MARSS(dat, model = list(U = matrix(c("N", "S", "S"), 3, 1))) +#' +#' # same model, but with fixed independent observation errors +#' # and the 3rd x processes are forced to have a U=0 +#' # Notice how a list matrix is used to combine fixed and estimated elements +#' # all parameters can be specified in this way using list matrices +#' kemfit <- MARSS(dat, model = list(U = matrix(list("N", "N", 0), 3, 1), R = diag(0.01, 3))) +#' +#' # fit a model with 2 hidden states (north and south) +#' # where observation time series 1-2 are north and 3 is south +#' # Make the hidden state process independent with same process var +#' # Make the observation errors different but independent +#' # Make the growth parameters (U) the same +#' # Create a Z matrix as a design matrix that assigns the "N" state to the first 2 rows of dat +#' # and the "S" state to the 3rd row of data +#' Z <- matrix(c(1, 1, 0, 0, 0, 1), 3, 2) +#' # You can use factor is a shortcut making the above design matrix for Z +#' # Z <- factor(c("N","N","S")) +#' # name the state vectors +#' colnames(Z) <- c("N", "S") +#' kemfit <- MARSS(dat, model = list( +#' Z = Z, +#' Q = "diagonal and equal", R = "diagonal and unequal", U = "equal" +#' )) +#' +#' # print the model followed by the marssMLE object +#' kemfit$model +#' +#' \dontrun{ +#' # simulate some new data from our fitted model +#' sim.data <- MARSSsimulate(kemfit, nsim = 10, tSteps = 10) +#' +#' # Compute bootstrap AIC for the model; this takes a long, long time +#' kemfit.with.AICb <- MARSSaic(kemfit, output = "AICbp") +#' kemfit.with.AICb +#' } +#' +#' \dontrun{ +#' # Many more short examples can be found in the +#' # Quick Examples chapter in the User Guide +#' RShowDoc("UserGuide", package = "MARSS") +#' +#' # You can find the R scripts from the chapters by +#' # going to the index page +#' RShowDoc("index", package = "MARSS") +#' } +#' +#' @export MARSS <- function(y, model = NULL, inits = NULL, diff --git a/R/MARSSFisherI.r b/R/MARSSFisherI.r index 8326bca..ada66ca 100644 --- a/R/MARSSFisherI.r +++ b/R/MARSSFisherI.r @@ -11,6 +11,128 @@ # Ho, Shumway and Ombao (2006) The state-space approach to modeling dynamic processes in Models for Intensive Longitudinal Data # page 157 suggest that this derivative is hard to compute. ####################################################################################################### + +#' Observed Fisher Information Matrix at the MLE +#' +#' @description +#' Returns the observed Fisher Information matrix for a [marssMLE] object (a +#' fitted MARSS model) via either the analytical algorithm of Harvey (1989) or +#' a numerical estimate. +#' +#' The observed Fisher Information is the negative of the second-order partial +#' derivatives of the log-likelihood function evaluated at the MLE. The +#' derivatives being with respect to the parameters. The Hessian matrix is the +#' second-order partial derivatives of a scalar-valued function. Thus the +#' observed Fisher Information matrix is the Hessian of the negative +#' log-likelihood function evaluated at the MLE (or equivalently the negative +#' of the Hessian of the log-likelihood function). The inverse of the observed +#' Fisher Information matrix is an estimate of the asymptotic +#' variance-covariance matrix for the estimated parameters. Use +#' [MARSShessian()] (which calls `MARSSFisherI()`) to return the parameter +#' variance-covariance matrix computed from the observed Fisher Information +#' matrix. +#' +#' Note for the numerically estimated Hessian, we pass in the negative +#' log-likelihood function to a minimization function. As a result, the +#' numerical functions return the Hessian of the negative log-likelihood +#' function (which is the observed Fisher Information matrix). +#' +#' @param MLEobj An object of class [marssMLE]. This object must have a `$par` +#' element containing MLE parameter estimates from e.g. [MARSSkem()]. +#' @param method The method to use for computing the observed Fisher +#' Information matrix. Options are `"Harvey1989"` to use the Harvey (1989) +#' recursion, which is an analytical solution, `"fdHess"` or `"optim"` +#' which are two numerical methods. Although `"optim"` can be passed to the +#' function, `"fdHess"` is used for all numerical estimates used in the +#' MARSS package. +#' +#' @details +#' Method `'fdHess'` uses [nlme::fdHess()] to numerically estimate the +#' Hessian of the negative log-likelihood function at the MLEs. Method +#' `'optim'` uses [optim()] with `hessian=TRUE` and `list(maxit=0)` to ensure +#' that the Hessian is computed at the values in the `par` element of the MLE +#' object. The `par` element of the [marssMLE] object is the MLE. +#' +#' Method `'Harvey1989'` (the default) uses the recursion in Harvey (1989) to +#' compute the observed Fisher Information of a MARSS model analytically. See +#' Holmes (2016c) for a discussion of the Harvey (1989) algorithm and see +#' Holmes (2017) on how to implement the algorithm for MARSS models with +#' linear constraints (the type of MARSS models that the MARSS R package +#' addresses). +#' +#' There has been research on computing the observed Fisher Information matrix +#' from the derivatives used by EM algorithms (discussed in Holmes (2016a, +#' 2016b)), for example Louis (1982). Unfortunately, the EM algorithm used in +#' the MARSS package is for time series data and the temporal correlation must +#' be dealt with, e.g. Duan & Fulop (2011). Oakes (1999) has an approach that +#' only involves derivatives of +#' \eqn{\textrm{E}[LL(\Theta)|\mathbf{y},\Theta']}{E(LL(Theta)|data, Theta')} +#' but one of the derivatives will be the derivative of the +#' \eqn{\textrm{E}[\mathbf{X}|\mathbf{y},\Theta']}{E(X|data, Theta')} with +#' respect to \eqn{\Theta'}{Theta'}. It is not clear how to do that +#' derivative. Moon-Ho, Shumway and Ombao (2006) suggest (page 157) that this +#' derivative is hard to compute. +#' +#' @return +#' Returns the observed Fisher Information matrix. +#' +#' @author +#' Eli Holmes, NOAA, Seattle, USA. +#' +#' @seealso [MARSSharveyobsFI()], [MARSShessian.numerical], [MARSSparamCIs], +#' [marssMLE] +#' +#' @examples +#' dat <- t(harborSeal) +#' dat <- dat[2:4, ] +#' MLEobj <- MARSS(dat, model=list(Z=matrix(1,3,1), R="diagonal and equal")) +#' MARSSFisherI(MLEobj) +#' MARSSFisherI(MLEobj, method="fdHess") +#' +#' @references +#' Harvey, A. C. (1989) Section 3.4.5 (Information matrix) in Forecasting, +#' structural time series models and the Kalman filter. Cambridge University +#' Press, Cambridge, UK. +#' +#' See also J. E. Cavanaugh and R. H. Shumway (1996) On computing the +#' expected Fisher information matrix for state-space model parameters. +#' Statistics & Probability Letters 26: 347-355. This paper discusses the +#' Harvey (1989) recursion (and proposes an alternative). +#' +#' Holmes, E. E. 2016a. Notes on computing the Fisher Information matrix for +#' MARSS models. Part I Background. Technical Report. +#' https://doi.org/10.13140/RG.2.2.27306.11204/1 +#' [Notes](https://eeholmes.github.io/posts/2016-5-18-FI-recursion-1/) +#' +#' Holmes, E. E. 2016b. Notes on computing the Fisher Information matrix for +#' MARSS models. Part II Louis 1982. Technical Report. +#' https://doi.org/10.13140/RG.2.2.35694.72000 +#' [Notes](https://eeholmes.github.io/posts/2016-5-19-FI-recursion-2/) +#' +#' Holmes, E. E. 2016c. Notes on computing the Fisher Information matrix for +#' MARSS models. Part III Overview of Harvey 1989. +#' +#' +#' Holmes, E. E. 2017. Notes on computing the Fisher Information matrix for +#' MARSS models. Part IV Implementing the Recursion in Harvey 1989. +#' +#' +#' Duan, J. C. and A. Fulop. (2011) A stable estimator of the information +#' matrix under EM for dependent data. Statistics and Computing 21: 83-91. +#' +#' Louis, T. A. 1982. Finding the observed information matrix when using the +#' EM algorithm. Journal of the Royal Statistical Society. Series B +#' (Methodological). 44: 226-233. +#' +#' Oakes, D. 1999. Direct calculation of the information matrix via the EM +#' algorithm. Journal of the Royal Statistical Society. Series B +#' (Methodological). 61: 479-482. +#' +#' Moon-Ho, R. H., R. H. Shumway, and Ombao 2006. The state-space approach to +#' modeling dynamic processes. Chapter 7 in Models for Intensive Longitudinal +#' Data. Oxford University Press. +#' +#' @export MARSSFisherI <- function(MLEobj, method = c("Harvey1989", "fdHess", "optim")) { # optim is here for debugging. fdHess is the numerical method used for users method <- match.arg(method) diff --git a/R/MARSS_dfa.r b/R/MARSS_dfa.r index 39776a8..18c0db1 100644 --- a/R/MARSS_dfa.r +++ b/R/MARSS_dfa.r @@ -12,6 +12,131 @@ # predict.marssMLE, coef.marssMLE, MARSSinits.marssMLE # print_dfa, residuals_dfa, predict_dfa, coef_dfa, MARSSinits_dfa ################################################################################### + +#' Multivariate Dynamic Factor Analysis +#' +#' @description +#' The Dynamic Factor Analysis model in MARSS is specified via +#' `form="dfa"` in a [MARSS()] function call. This is a MARSS(1) model of +#' the form: +#' \deqn{\mathbf{x}_{t} = \mathbf{x}_{t-1} + \mathbf{w}_t, \textrm{ where } \mathbf{W}_t \sim \textrm{MVN}(0,\mathbf{I})}{x(t) = x(t-1) + w(t), where W(t) ~ MVN(0,I)} +#' \deqn{\mathbf{y}_t = \mathbf{Z}_t \mathbf{x}_t + \mathbf{D}_t \mathbf{d}_t + \mathbf{v}_t, \textrm{ where } \mathbf{V}_t \sim \textrm{MVN}(0,\mathbf{R}_t)}{y(t) = Z(t) x(t) + D(t) d(t) + v(t), where V(t) ~ MVN(0,R(t))} +#' \deqn{\mathbf{X}_1 \sim \textrm{MVN}(\mathbf{x0}, 5\mathbf{I})}{X(1) ~ MVN(x0, 5I) } +#' Note, by default \eqn{\mathbf{x}_1}{x(1)} is treated as a diffuse prior. +#' +#' Passing in `form="dfa"` to [MARSS()] invokes a helper function to create +#' that model and creates the \eqn{\mathbf{Z}}{Z} matrix for the user. +#' \eqn{\mathbf{Q}}{Q} is by definition identity, \eqn{\mathbf{x}_0}{x0} is +#' zero and \eqn{\mathbf{V_0}}{V0} is diagonal with large variance (5). +#' \eqn{\mathbf{u}}{U} is zero, \eqn{\mathbf{a}}{A} is zero, and covariates +#' only enter the \eqn{\mathbf{y}}{Y} equation. Because \eqn{\mathbf{u}}{U} +#' and \eqn{\mathbf{a}}{A} are 0, the data should have mean 0 (demeaned) +#' otherwise one is likely to be creating a structurally inadequate model +#' (i.e. the model implies that the data have mean = 0, yet data do not have +#' mean = 0). +#' +#' @section Usage: +#' ``` +#' MARSS(y, +#' inits = NULL, +#' model = NULL, +#' miss.value = as.numeric(NA), +#' method = "kem", +#' form = "dfa", +#' fit = TRUE, +#' silent = FALSE, +#' control = NULL, +#' fun.kf = "MARSSkfas", +#' demean = TRUE, +#' z.score = TRUE) +#' ``` +#' +#' @param MARSS.call A list of arguments from a [MARSS()] call with +#' `form="dfa"`. +#' +#' @details +#' Some arguments are common to all forms: "y" (data), "inits", "control", +#' "method", "form", "fit", "silent", "fun.kf". See [MARSS()] for information +#' on these arguments. +#' +#' In addition, `form="dfa"` has some special arguments that can be passed in: +#' +#' * `demean` Logical. Default is TRUE, which means the data will be demeaned. +#' * `z.score` Logical. Default is TRUE, which means the data will be +#' z-scored (demeaned and variance standardized to 1). +#' * `covariates` Covariates (\eqn{d}) for the \eqn{y} equation. No missing +#' values allowed and must be a matrix with the same number of time steps as +#' the data. An unconstrained \eqn{D} matrix will be estimated. +#' +#' The `model` argument of the [MARSS()] call is constrained in terms of what +#' parameters can be changed and how they can be changed. An additional +#' element, `m`, can be passed into the `model` argument that specifies the +#' number of hidden state variables. It is not necessary for the user to +#' specify `Z` as the helper function will create a `Z` appropriate for a DFA +#' model. +#' +#' The `model` argument is a list. The following details what list elements +#' can be passed in: +#' +#' * `B` "Identity". The standard (and default) DFA model has B="identity". +#' However it can be "identity", "diagonal and equal", "diagonal and +#' unequal" or a time-varying fixed or estimated diagonal matrix. +#' * `U` "Zero". Cannot be changed or passed in via model argument. +#' * `Q` "Identity". The standard (and default) DFA model has Q="identity". +#' However, it can be "identity", "diagonal and equal", "diagonal and +#' unequal" or a time-varying fixed or estimated diagonal matrix. +#' * `Z` Can be passed in as a (list) matrix if the user does not want a +#' default DFA `Z` matrix. There are many equivalent ways to construct a +#' DFA `Z` matrix. The default is Zuur et al.'s form (see User Guide). +#' * `A` Default="zero". Can be "unequal", "zero" or a matrix. +#' * `R` Default="diagonal and equal". Can be set to "identity", "zero", +#' "unconstrained", "diagonal and unequal", "diagonal and equal", +#' "equalvarcov", or a (list) matrix to specify general forms. +#' * `x0` Default="zero". Can be "unconstrained", "unequal", "zero", or a +#' (list) matrix. +#' * `V0` Default=diagonal matrix with 5 on the diagonal. Can be "identity", +#' "zero", or a matrix. +#' * `tinitx` Default=0. Can be 0 or 1. Tells MARSS whether x0 is at t=0 or +#' t=1. +#' * `m` Default=1. Can be 1 to n (the number of y time-series). Must be +#' integer. +#' +#' See the +#' [User Guide](https://cran.r-project.org/package=MARSS/vignettes/UserGuide.pdf) +#' chapter on Dynamic Factor Analysis for examples of using `form="dfa"`. +#' +#' @return +#' An object of class [marssMLE]. See [print.marssMLE()] for a discussion of +#' the various output available for [marssMLE] objects (coefficients, +#' residuals, Kalman filter and smoother output, imputed values for missing +#' data, etc.). See [MARSSsimulate()] for simulating from [marssMLE] objects. +#' [MARSSboot()] for bootstrapping, [MARSSaic()] for calculation of various +#' AIC related model selection metrics, and [MARSSparamCIs()] for calculation +#' of confidence intervals and bias. +#' +#' @author +#' Eli Holmes, NOAA, Seattle, USA. +#' +#' @seealso [MARSS()], [MARSS.marxss()] +#' +#' @references +#' The MARSS User Guide: Holmes, E. E., E. J. Ward, and M. D. Scheuerell +#' (2012) Analysis of multivariate time-series using the MARSS package. NOAA +#' Fisheries, Northwest Fisheries Science Center, 2725 Montlake Blvd E., +#' Seattle, WA 98112. Type `RShowDoc("UserGuide",package="MARSS")` to open a +#' copy. +#' +#' @examples +#' \dontrun{ +#' dat <- t(harborSealWA[,-1]) +#' # DFA with 3 states; used BFGS because it fits much faster for this model +#' fit <- MARSS(dat, model = list(m=3), form="dfa", method="BFGS") +#' +#' # See the Dynamic Factor Analysis chapter in the User Guide +#' RShowDoc("UserGuide", package = "MARSS") +#' } +#' +#' @export MARSS.dfa <- function(MARSS.call) { # MARSS(data, model=list(), covariates=NULL, z.score=TRUE, demean=TRUE, control=list()) # model.defaults =list(A="zero", R="diagonal and equal", D="zero", x0="zero", V0=diag(5,1), tinitx=0, diffuse=FALSE, m=1) diff --git a/R/MARSS_marss.R b/R/MARSS_marss.R index 8a5dfa4..eaec48a 100644 --- a/R/MARSS_marss.R +++ b/R/MARSS_marss.R @@ -13,6 +13,76 @@ # is.marssMODEL_marss ################################################################################### + +#' Multivariate AR-1 State-space Model +#' +#' @description +#' The form of MARSS models for users is "marxss", the MARSS models with +#' inputs. See [MARSS.marxss()]. In the internal algorithms (e.g. +#' [MARSSkem()]), the "marss" form is used and the +#' \eqn{\mathbf{D}\mathbf{d}_t}{Dd(t)} are incorporated into the +#' \eqn{\mathbf{a}_t}{a(t)} matrix and \eqn{\mathbf{C}\mathbf{c}_t}{Cc(t)} +#' are incorporated into the \eqn{\mathbf{u}_t}{u(t)}. The +#' \eqn{\mathbf{a}}{a} and \eqn{\mathbf{u}}{u} matrices then become +#' time-varying if the model includes \eqn{\mathbf{d}_t}{d(t)} and +#' \eqn{\mathbf{c}_t}{c(t)}. +#' +#' This is a MARSS(1) model of the marss form: +#' \deqn{\mathbf{x}_{t} = \mathbf{B} \mathbf{x}_{t-1} + \mathbf{u}_t + \mathbf{G} \mathbf{w}_t, \textrm{ where } \mathbf{W}_t \sim \textrm{MVN}(0,\mathbf{Q})}{x(t) = B x(t-1) + u(t) + G w(t), where W(t) ~ MVN(0,Q)} +#' \deqn{\mathbf{y}_t = \mathbf{Z} \mathbf{x}_t + \mathbf{a}_t + \mathbf{H} \mathbf{v}_t, \textrm{ where } \mathbf{V}_t \sim \textrm{MVN}(0,\mathbf{R})}{y(t) = Z x(t) + a(t) + H v(t), where V(t) ~ MVN(0,R)} +#' \deqn{\mathbf{X}_1 \sim \textrm{MVN}(\mathbf{x0}, \mathbf{V0}) \textrm{ or } \mathbf{X}_0 \sim \textrm{MVN}(\mathbf{x0}, \mathbf{V0}) }{X(1) ~ MVN(x0, V0) or X(0) ~ MVN(x0, V0) } +#' Note, by default \eqn{\mathbf{V0}}{V0} is a matrix of all zeros and thus +#' \eqn{\mathbf{x}_1}{x(1)} or \eqn{\mathbf{x}_0}{x(0)} is treated as an +#' estimated parameter not a diffuse prior. To remove clutter, the rest of +#' the parameters are shown as time-constant (no \eqn{t} subscript) but all +#' parameters can be time-varying. +#' +#' Note, "marss" is a model form. A model form is defined by a collection of +#' form functions discussed in [marssMODEL]. These functions are not exported +#' to the user, but are called by [MARSS()] using the argument `form`. These +#' internal functions convert the users model list into the vec form of a +#' MARSS model and do extensive error-checking. +#' +#' @section Usage: +#' ``` +#' MARSS(y, +#' inits = NULL, +#' model = NULL, +#' miss.value = as.numeric(NA), +#' method = "kem", +#' form = "marxss", +#' fit = TRUE, +#' silent = FALSE, +#' control = NULL, +#' fun.kf = "MARSSkfas", +#' ...) +#' ``` +#' +#' @param MARSS.call A list of arguments from a [MARSS()] call with +#' `form="marss"`. +#' +#' @details +#' See the help page for the [MARSS.marxss()] form for details. +#' +#' @return +#' An object of class [marssMLE]. +#' +#' @author +#' Eli Holmes, NOAA, Seattle, USA. +#' +#' @seealso [marssMODEL], [MARSS.marxss()] +#' +#' @examples +#' \dontrun{ +#' # See the MARSS man page for examples +#' ?MARSS +#' +#' # and the Quick Examples chapter in the User Guide +#' RShowDoc("UserGuide", package = "MARSS") +#' } +#' +#' @keywords appendix +#' @export MARSS.marss <- function(MARSS.call) { # load need package globals common.allowed.in.MARSS.call <- get("common.allowed.in.MARSS.call", envir = pkg_globals) diff --git a/R/MARSS_marxss.R b/R/MARSS_marxss.R index b9fde11..37fe534 100644 --- a/R/MARSS_marxss.R +++ b/R/MARSS_marxss.R @@ -27,6 +27,201 @@ # Part 3 Do the conversion of marxss object to marss object ################################################################################### + +#' Multivariate AR-1 State-space Model with Inputs +#' +#' @description +#' The argument `form="marxss"` in a [MARSS()] function call specifies a +#' MAR-1 model with eXogenous variables model. This is a MARSS(1) model of +#' the form: +#' \deqn{\mathbf{x}_{t} = \mathbf{B}_t \mathbf{x}_{t-1} + \mathbf{u}_t + \mathbf{C}_t \mathbf{c}_t + \mathbf{G}_t \mathbf{w}_t, \textrm{ where } \mathbf{W}_t \sim \textrm{MVN}(0,\mathbf{Q}_t)}{x(t) = B(t) x(t-1) + u(t) + C(t) c(t) + G(t) w(t), where W(t) ~ MVN(0,Q(t))} +#' \deqn{\mathbf{y}_t = \mathbf{Z}_t \mathbf{x}_t + \mathbf{a}_t + \mathbf{D}_t \mathbf{d}_t + \mathbf{H}_t \mathbf{v}_t, \textrm{ where } \mathbf{V}_t \sim \textrm{MVN}(0,\mathbf{R}_t)}{y(t) = Z(t) x(t) + a(t) + D(t) d(t) + H(t) v(t), where V(t) ~ MVN(0,R(t))} +#' \deqn{\mathbf{X}_1 \sim \textrm{MVN}(\mathbf{x0}, \mathbf{V0}) \textrm{ or } \mathbf{X}_0 \sim \textrm{MVN}(\mathbf{x0}, \mathbf{V0}) }{X(1) ~ MVN(x0, V0) or X(0) ~ MVN(x0, V0) } +#' Note, by default \eqn{\mathbf{V0}}{V0} is a matrix of all zeros and thus +#' \eqn{\mathbf{x}_1}{x(1)} or \eqn{\mathbf{x}_0}{x(0)} is treated as an +#' estimated parameter not a diffuse prior. +#' +#' Note, "marxss" is a model form. A model form is defined by a collection of +#' form functions discussed in [marssMODEL]. These functions are not exported +#' to the user, but are called by [MARSS()] using the argument `form`. +#' +#' @section Usage: +#' ``` +#' MARSS(y, +#' inits = NULL, +#' model = NULL, +#' miss.value = as.numeric(NA), +#' method = "kem", +#' form = "marxss", +#' fit = TRUE, +#' silent = FALSE, +#' control = NULL, +#' fun.kf = "MARSSkfas", +#' ...) +#' ``` +#' +#' @param MARSS.call A list of arguments from a [MARSS()] call with +#' `form="marxss"`. +#' +#' @details +#' The allowed arguments when `form="marxss"` are 1) the arguments common to +#' all forms: "y" (data), "inits", "control", "method", "form", "fit", +#' "silent", "fun.kf" (see [MARSS()] for information on these arguments) and +#' 2) the argument "model" which is a list describing the MARXSS model (the +#' model list is described below). See the +#' [Quick Start Guide](https://cran.r-project.org/package=MARSS/vignettes/Quick_Start.html) +#' or the +#' [User Guide](https://cran.r-project.org/package=MARSS/vignettes/UserGuide.pdf) +#' for examples. +#' +#' The argument `model` must be a list. The elements in the list specify the +#' structure for the \eqn{\mathbf{B}}{B}, \eqn{\mathbf{u}}{u}, +#' \eqn{\mathbf{C}}{C}, \eqn{\mathbf{c}}{c}, \eqn{\mathbf{Q}}{Q}, +#' \eqn{\mathbf{Z}}{Z}, \eqn{\mathbf{a}}{a}, \eqn{\mathbf{D}}{D}, +#' \eqn{\mathbf{d}}{d}, \eqn{\mathbf{R}}{R}, \eqn{\mathbf{x}_0}{x0}, and +#' \eqn{\mathbf{V}_0}{V0} in the MARXSS model (above). The list elements can +#' have the following values: +#' +#' * `Z` Default="identity". A text string, "identity","unconstrained", +#' "diagonal and unequal", "diagonal and equal", "equalvarcov", or +#' "onestate", or a length n vector of factors specifying which of the m +#' hidden state time series correspond to which of the n observation time +#' series. May be specified as a n x m list matrix for general specification +#' of both fixed and shared elements within the matrix. May also be +#' specified as a numeric n x m matrix to use a custom fixed +#' \eqn{\mathbf{Z}}{Z}. "onestate" gives a n x 1 matrix of 1s. +#' "identity","unconstrained", "diagonal and unequal", "diagonal and equal", +#' and "equalvarcov" all specify n x n matrices. +#' * `B` Default="identity". A text string, "identity", "unconstrained", +#' "diagonal and unequal", "diagonal and equal", "equalvarcov", "zero". Can +#' also be specified as a list matrix for general specification of both fixed +#' and shared elements within the matrix. May also be specified as a numeric +#' m x m matrix to use custom fixed \eqn{\mathbf{B}}{B}, but in this case +#' all the eigenvalues of \eqn{\mathbf{B}}{B} must fall in the unit circle. +#' * `U`, `x0` Default="unconstrained". A text string, "unconstrained", +#' "equal", "unequal" or "zero". May be specified as a m x 1 list matrix +#' for general specification of both fixed and shared elements within the +#' matrix. May also be specified as a numeric m x 1 matrix to use a custom +#' fixed \eqn{\mathbf{u}}{u} or \eqn{\mathbf{x}_0}{x(0)}. Notice that `U` +#' is capitalized in the `model` argument and output lists. +#' * `A` Default="scaling". A text string, "scaling","unconstrained", "equal", +#' "unequal" or "zero". May be specified as a n x 1 list matrix for general +#' specification of both fixed and shared elements within the matrix. May +#' also be specified as a numeric n x 1 matrix to use a custom fixed +#' \eqn{\mathbf{a}}{a}. Care must be taken when specifying `A` so that the +#' model is not under-constrained and unsolvable. The default, "scaling", +#' only applies to \eqn{\mathbf{Z}}{Z} matrices that are design matrices +#' (only 1s and 0s and all rows sum to 1). When a column in +#' \eqn{\mathbf{Z}}{Z} has multiple 1s, the first row in the +#' \eqn{\mathbf{a}}{a} matrix associated with those \eqn{\mathbf{Z}}{Z} +#' rows is 0 and the other associated \eqn{\mathbf{a}}{a} rows have an +#' estimated value. This is used to treat \eqn{\mathbf{a}}{a} as an +#' intercept where one intercept for each \eqn{\mathbf{x}}{x} (hidden +#' state) is fixed at 0 and any other intercepts associated with that +#' \eqn{\mathbf{x}}{x} have an estimated intercept. This ensures a solvable +#' model when \eqn{\mathbf{Z}}{Z} is a design matrix. Note in the model +#' argument and output, `A` is capitalized. +#' * `Q` Default="diagonal and unequal". A text string, "identity", +#' "unconstrained", "diagonal and unequal", "diagonal and equal", +#' "equalvarcov", "zero". May be specified as a list matrix for general +#' specification of both fixed and shared elements within the matrix. May +#' also be specified as a numeric g x g matrix to use a custom fixed matrix. +#' Default value of g is m, so \eqn{\mathbf{Q}}{Q} is a m x m matrix. g is +#' the number of columns in \eqn{\mathbf{G}}{G} (below). +#' * `R` Default="diagonal and equal". A text string, "identity", +#' "unconstrained", "diagonal and unequal", "diagonal and equal", +#' "equalvarcov", "zero". May be specified as a list matrix for general +#' specification of both fixed and shared elements within the matrix. May +#' also be specified as a numeric h x h matrix to use a custom fixed matrix. +#' Default value of h is n, so \eqn{\mathbf{R}}{R} is a n x n matrix. h is +#' the number of columns in \eqn{\mathbf{H}}{H} (below). +#' * `V0` Default="zero". A text string, "identity", "unconstrained", +#' "diagonal and unequal", "diagonal and equal", "equalvarcov", "zero". May +#' be specified as a list matrix for general specification of both fixed and +#' shared elements within the matrix. May also be specified as a numeric m x +#' m matrix to use a custom fixed matrix. +#' * `D` and `C` Default="zero". A text string, "identity", "unconstrained", +#' "diagonal and unequal", "diagonal and equal", "equalvarcov", "zero". Can +#' be specified as a list matrix for general specification of both fixed and +#' shared elements within the matrix. May also be specified as a numeric +#' matrix to use custom fixed values. Must have n rows +#' (\eqn{\mathbf{D}}{D}) or m rows (\eqn{\mathbf{C}}{C}). +#' * `d` and `c` Default="zero". Numeric matrix. No missing values allowed. +#' Must have 1 column or the same number of columns as the data, +#' \eqn{\mathbf{y}}{y}. The numbers of rows in \eqn{\mathbf{d}}{d} must be +#' the same as number of columns in \eqn{\mathbf{D}}{D}; similarly for +#' \eqn{\mathbf{c}}{c} and \eqn{\mathbf{C}}{C}. +#' * `G` and `H` Default="identity". A text string, "identity". Can be +#' specified as a numeric matrix or array for time-varying cases. Must have +#' m rows and g columns (\eqn{\mathbf{G}}{G}) or n rows and h columns +#' (\eqn{\mathbf{H}}{H}). g is the dim of \eqn{\mathbf{Q}}{Q} and h is the +#' dim of \eqn{\mathbf{R}}{R}. +#' * `tinitx` Default=0. Whether the initial state is specified at t=0 +#' (default) or t=1. +#' +#' All parameters except \eqn{\mathbf{x}_0}{x0} and \eqn{\mathbf{V}_0}{V0} +#' may be time-varying. If time-varying, then text shortcuts cannot be used. +#' Enter as an array with the 3rd dimension being time. Time dimension must be +#' 1 or equal to the number of time-steps in the data. See Quick Start guide +#' (`RShowDoc("Quick_Start",package="MARSS")`) or the User Guide +#' (`RShowDoc("UserGuide",package="MARSS")`) for examples. +#' +#' Valid model structures for `method="BFGS"` are the same as for +#' `method="kem"`. See [MARSSoptim()] for the allowed options for this method. +#' +#' The default estimation method, `method="kem"`, is the EM algorithm +#' described in the MARSS User Guide. The default settings for the control and +#' inits arguments are set via `MARSS:::alldefaults$kem` in +#' `MARSSsettings.R`. The defaults for the model argument are set in +#' `MARSS_marxss.R`. For this method, they are: +#' +#' * inits = list(B=1, U=0, Q=0.05, Z=1, A=0, R=0.05, x0=-99, V0=0.05, +#' G=0, H=0, L=0, C=0, D=0, c=0, d=0) +#' * model = list(Z="identity", A="scaling", R="diagonal and equal", +#' B="identity", U="unconstrained", Q="diagonal and unequal", +#' x0="unconstrained", V0="zero", C="zero", D="zero", +#' c=matrix(0,0,1), d=matrix(0,0,1), tinitx=0, diffuse=FALSE) +#' * control=list(minit=15, maxit=500, abstol=0.001, trace=0, sparse=FALSE, +#' safe=FALSE, allow.degen=TRUE, min.degen.iter=50, degen.lim=1.0e-04, +#' min.iter.conv.test=15, conv.test.deltaT=9, conv.test.slope.tol=0.5, +#' demean.states=FALSE). You can read about these in [MARSS()]. If you want +#' to speed up your fits, you can turn off most of the model checking using +#' `trace=-1`. +#' * fun.kf = "MARSSkfas". This sets the Kalman filter function to use. +#' `MARSSkfas()` is generally more stable as it uses Durban & Koopman's +#' algorithm. But it may dramatically slow down when the data set is large +#' (more than 10 rows of data). Try the classic Kalman filter algorithm to +#' see if it runs faster by setting `fun.kf="MARSSkfss"`. You can read about +#' the two algorithms in [MARSSkf()]. +#' +#' For `method="BFGS"`, type `MARSS:::alldefaults$BFGS` to see the defaults. +#' +#' @return +#' An object of class [marssMLE]. See [print.marssMLE()] for a discussion of +#' the various output available for [marssMLE] objects (coefficients, +#' residuals, Kalman filter and smoother output, imputed values for missing +#' data, etc.). See [MARSSsimulate()] for simulating from [marssMLE] objects. +#' [MARSSboot()] for bootstrapping, [MARSSaic()] for calculation of various +#' AIC related model selection metrics, and [MARSSparamCIs()] for calculation +#' of confidence intervals and bias. See [plot.marssMLE()] for some default +#' plots of a model fit. +#' +#' @author +#' Eli Holmes, NOAA, Seattle, USA. +#' +#' @seealso [marssMODEL], [MARSS.dfa()] +#' +#' @examples +#' \dontrun{ +#' # See the MARSS man page for examples +#' ?MARSS +#' +#' # and the Quick Examples chapter in the User Guide +#' RShowDoc("UserGuide", package = "MARSS") +#' } +#' +#' @keywords appendix +#' @export MARSS.marxss <- function(MARSS.call) { # load needed package globals common.allowed.in.MARSS.call <- get("common.allowed.in.MARSS.call", envir = pkg_globals) diff --git a/R/MARSSaic.R b/R/MARSSaic.R index e307641..adffb5e 100644 --- a/R/MARSSaic.R +++ b/R/MARSSaic.R @@ -4,6 +4,78 @@ # elements in output arg # samp.size, num.params ####################################################################################################### + +#' AIC for MARSS Models +#' +#' @description +#' Calculates AIC, AICc, a parametric bootstrap AIC (AICbp) and a +#' non-parametric bootstrap AIC (AICbb). If you simply want the AIC value for +#' a [marssMLE] object, you can use `AIC(fit)`. +#' +#' @param MLEobj An object of class [marssMLE]. This object must have a `$par` +#' element containing MLE parameter estimates from e.g. `MARSSkem()`. +#' @param output A vector containing one or more of the following: "AIC", +#' "AICc", "AICbp", "AICbb", "AICi", "boot.params". See Details. +#' @param Options A list containing: +#' * `nboot` Number of bootstraps (positive integer) +#' * `return.logL.star` Return the log-likelihoods for each bootstrap? (T/F) +#' * `silent` Suppress printing of the progress bar during AIC bootstraps? +#' (T/F) +#' +#' @details +#' When sample size is small, Akaike's Information Criterion (AIC) +#' under-penalizes more complex models. The most commonly used small sample +#' size corrector is AICc, which uses a penalty term of +#' \eqn{K n/(n-K-1)}, where \eqn{K} is the number of estimated parameters. +#' However, for time series models, AICc still under-penalizes complex models; +#' this is especially true for MARSS models. +#' +#' Two small-sample estimators specific for MARSS models have been developed. +#' Cavanaugh and Shumway (1997) developed a variant of bootstrapped AIC using +#' Stoffer and Wall's (1991) bootstrap algorithm ("AICbb"). Holmes and Ward +#' (2010) developed a variant on AICb ("AICbp") using a parametric bootstrap. +#' The parametric bootstrap permits AICb calculation when there are missing +#' values in the data, which Cavanaugh and Shumway's algorithm does not allow. +#' More recently, Bengtsson and Cavanaugh (2006) developed another +#' small-sample AIC estimator, AICi, based on fitting candidate models to +#' multivariate white noise. +#' +#' When the `output` argument passed in includes both `"AICbp"` and +#' `"boot.params"`, the bootstrapped parameters from `"AICbp"` will be added +#' to `MLEobj`. +#' +#' @return +#' Returns the [marssMLE] object that was passed in with additional AIC +#' components added on top as specified in the 'output' argument. +#' +#' @references +#' Holmes, E. E., E. J. Ward, and M. D. Scheuerell (2012) Analysis of +#' multivariate time-series using the MARSS package. NOAA Fisheries, Northwest +#' Fisheries Science Center, 2725 Montlake Blvd E., Seattle, WA 98112. Type +#' `RShowDoc("UserGuide",package="MARSS")` to open a copy. +#' +#' Bengtsson, T., and J. E. Cavanaugh. 2006. An improved Akaike information +#' criterion for state-space model selection. Computational Statistics & Data +#' Analysis 50:2635-2654. +#' +#' Cavanaugh, J. E., and R. H. Shumway. 1997. A bootstrap variant of AIC for +#' state-space model selection. Statistica Sinica 7:473-496. +#' +#' @author +#' Eli Holmes, NOAA, Seattle, USA. +#' +#' @seealso [MARSSboot()] +#' +#' @examples +#' dat <- t(harborSealWA) +#' dat <- dat[2:3, ] +#' kem <- MARSS(dat, model = list( +#' Z = matrix(1, 2, 1), +#' R = "diagonal and equal" +#' )) +#' kemAIC <- MARSSaic(kem, output = c("AIC", "AICc")) +#' +#' @export MARSSaic <- function(MLEobj, output = c("AIC", "AICc"), Options = list( nboot = 1000, return.logL.star = FALSE, silent = FALSE diff --git a/R/MARSSapplynames.R b/R/MARSSapplynames.R index 1542535..49cb7bc 100644 --- a/R/MARSSapplynames.R +++ b/R/MARSSapplynames.R @@ -1,3 +1,26 @@ +#' Names for marssMLE Object Components +#' +#' @description +#' Puts names on the par, start, par.se, init components of [marssMLE] +#' objects. This is a utility function in the **MARSS** package and is not +#' exported. +#' +#' @param MLEobj An object of class [marssMLE]. +#' +#' @details +#' The X.names and Y.names are attributes of [marssMODEL] objects (which +#' would be in `$marss` and `$model` in the [marssMLE] object). These names +#' are applied to the par elements in the [marssMLE] object. +#' +#' @return +#' The object passed in, with row and column names on matrices as specified. +#' +#' @author +#' Eli Holmes, NOAA, Seattle, USA. +#' +#' @seealso [marssMLE], [marssMODEL] +#' +#' @keywords internal MARSSapplynames <- function(MLEobj) { ## Helper function to put names on the elements in a marssMLE object if (!inherits(MLEobj, "marssMLE")) { diff --git a/R/MARSSboot.R b/R/MARSSboot.R index bfcc8fc..23aefae 100644 --- a/R/MARSSboot.R +++ b/R/MARSSboot.R @@ -4,6 +4,69 @@ # This is an MLE function and uses marssMLE objects # return(list(boot.params=boot.params, boot.data=boot.data, model=MLEobj, nboot=nboot, output=output, sim=sim, param.gen=param.gen, control=control)) ####################################################################################################### +#' Bootstrap MARSS Parameter Estimates +#' +#' @description +#' Creates bootstrap parameter estimates and simulated (or bootstrapped) data (if appropriate). This is a base function in the [MARSS-package]. +#' +#' @param MLEobj An object of class [marssMLE]. Must have a `$par` element containing MLE parameter estimates. +#' @param nboot Number of bootstraps to perform. +#' @param output Output to be returned: "data", "parameters" or "all". +#' @param sim Type of bootstrap: "parametric" or "innovations". See Details. +#' @param param.gen Parameter generation method: "hessian" or "MLE". +#' @param control The options in `MLEobj$control` are used by default. If supplied here, must contain all of the following: +#' * `max.iter`: Maximum number of EM iterations. +#' * `tol`: Optional tolerance for log-likelihood change. If log-likelihood decreases less than this amount relative to the previous iteration, the EM algorithm exits. +#' * `allow.degen`: Whether to try setting \eqn{\mathbf{Q}}{Q} or \eqn{\mathbf{R}}{R} elements to zero if they appear to be going to zero. +#' @param silent Suppresses printing of progress bar. +#' +#' @details +#' Approximate confidence intervals (CIs) on the model parameters can be calculated by the observed Fisher Information matrix (the Hessian of the negative log-likelihood function). The Hessian CIs (`param.gen="hessian"`) are based on the asymptotic normality of ML estimates under a large-sample approximation. CIs that are not based on asymptotic theory can be calculated using parametric and non-parametric bootstrapping (`param.gen="MLE"`). In this case, parameter estimates are generated by the ML estimates from each bootstrapped data set. The MLE method (kem or BFGS) is determined by `MLEobj$method`. +#' +#' Stoffer and Wall (1991) present an algorithm for generating CIs via a non-parametric bootstrap for state-space models (`sim = "innovations"`). The basic idea is that the Kalman filter can be used to generate estimates of the residuals of the model fit. These residuals are then standardized and resampled and used to generate bootstrapped data using the MARSS model and its maximum-likelihood parameter estimates. One of the limitations of the Stoffer and Wall algorithm is that it cannot be used when there are missing data, unless all data at time \eqn{t} are missing. An alternative approach is a parametric bootstrap (`sim = "parametric"`), in which the ML parameter estimates are used to produce bootstrapped data directly from the state-space model. +#' +#' @return +#' A list with the following components: +#' * `boot.params`: Matrix (number of params x nboot) of parameter estimates from the bootstrap. +#' * `boot.data`: Array (n x t x nboot) of simulated (or bootstrapped) data (if requested and appropriate). +#' * `marss`: The [marssMODEL] object (form="marss") that was passed in via `MLEobj$marss`. +#' * `nboot`: Number of bootstraps performed. +#' * `output`: Type of output returned. +#' * `sim`: Type of bootstrap. +#' * `param.gen`: Parameter generation method: "hessian" or "KalmanEM". +#' +#' @references +#' Holmes, E. E., E. J. Ward, and M. D. Scheuerell (2012) Analysis of multivariate time-series using the MARSS package. NOAA Fisheries, Northwest Fisheries Science Center, 2725 Montlake Blvd E., Seattle, WA 98112 Type `RShowDoc("UserGuide",package="MARSS")` to open a copy. +#' +#' Stoffer, D. S., and K. D. Wall. 1991. Bootstrapping state-space models: Gaussian maximum likelihood estimation and the Kalman filter. Journal of the American Statistical Association 86:1024-1033. +#' +#' Cavanaugh, J. E., and R. H. Shumway. 1997. A bootstrap variant of AIC for state-space model selection. Statistica Sinica 7:473-496. +#' +#' @author +#' Eli Holmes and Eric Ward, NOAA, Seattle, USA. +#' +#' @seealso +#' [marssMLE], [marssMODEL], [MARSSaic()], [MARSShessian()], [MARSSFisherI()] +#' +#' @examples +#' # nboot is set low in these examples in order to run quickly +#' # normally nboot would be >1000 at least +#' dat <- t(kestrel) +#' dat <- dat[2:3, ] +#' # maxit set low to speed up the example +#' kem <- MARSS(dat, +#' model = list(U = "equal", Q = diag(.01, 2)), +#' control = list(maxit = 50) +#' ) +#' # bootstrap parameters from a Hessian matrix +#' hess.list <- MARSSboot(kem, param.gen = "hessian", nboot = 4) +#' +#' # from resampling the innovations (no missing values allowed) +#' boot.innov.list <- MARSSboot(kem, output = "all", sim = "innovations", nboot = 4) +#' +#' # bootstrapped parameter estimates +#' hess.list$boot.params +#' @export MARSSboot <- function(MLEobj, nboot = 1000, output = "parameters", sim = "parametric", param.gen = "MLE", control = NULL, silent = FALSE) { diff --git a/R/MARSSharveyobsFI.R b/R/MARSSharveyobsFI.R index 1eb3960..e5953d0 100644 --- a/R/MARSSharveyobsFI.R +++ b/R/MARSSharveyobsFI.R @@ -4,6 +4,42 @@ # With modification for missing values # Reference Holmes, E. E. (2014). Computation of standardized residuals for (MARSS) models. Technical Report. arXiv:1411.0045 [stat.ME] #################################################################################### +#' Hessian Matrix via the Harvey (1989) Recursion +#' +#' @description +#' Calculates the observed Fisher Information analytically via the recursion by Harvey (1989) as adapted by Holmes (2017) for MARSS models with linear constraints. This is the same as the Hessian of the negative log-likelihood function at the MLEs. This is a utility function in the [MARSS-package] and is not exported. Use [MARSShessian()] to access. +#' +#' @param MLEobj An object of class [marssMLE]. This object must have a `$par` element containing MLE parameter estimates from e.g. [MARSSkem]. +#' +#' @return +#' The observed Fisher Information matrix computed via equation 3.4.69 in Harvey (1989). The differentials in the equation are computed in the recursion in equations 3.4.73a to 3.4.74b. See Holmes (2016c) for a discussion of the Harvey (1989) algorithm and Holmes (2017) for the specific implementation of the algorithm for MARSS models with linear constraints. +#' +#' Harvey (1989) discusses missing observations in section 3.4.7. However, the `MARSSharveyobsFI()` function implements the approach of Shumway and Stoffer (2006) in section 6.4 for the missing values. See Holmes (2012) for a full discussion of the missing values modifications. +#' +#' @author +#' Eli Holmes, NOAA, Seattle, USA. +#' +#' @seealso [MARSShessian()], [MARSSparamCIs()] +#' +#' @examples +#' dat <- t(harborSeal) +#' dat <- dat[c(2, 11), ] +#' fit <- MARSS(dat) +#' MARSS:::MARSSharveyobsFI(fit) +#' +#' @references +#' R. H. Shumway and D. S. Stoffer (2006). Section 6.4 (Missing Data Modifications) in Time series analysis and its applications. Springer-Verlag, New York. +#' +#' Harvey, A. C. (1989) Section 3.4.5 (Information matrix) in Forecasting, structural time series models and the Kalman filter. Cambridge University Press, Cambridge, UK. +#' +#' See also J. E. Cavanaugh and R. H. Shumway (1996) On computing the expected Fisher information matrix for state-space model parameters. Statistics & Probability Letters 26: 347-355. This paper discusses the Harvey (1989) recursion (and proposes an alternative). +#' +#' Holmes, E. E. (2012). Derivation of the EM algorithm for constrained and unconstrained multivariate autoregressive state-space (MARSS) models. Technical Report. arXiv:1302.3919 [stat.ME] +#' +#' Holmes, E. E. 2016c. Notes on computing the Fisher Information matrix for MARSS models. Part III Overview of Harvey 1989. +#' +#' Holmes, E. E. 2017. Notes on computing the Fisher Information matrix for MARSS models. Part IV Implementing the Recursion in Harvey 1989. +#' @keywords internal MARSSharveyobsFI <- function(MLEobj) { paramvector <- MARSSvectorizeparam(MLEobj) par.names <- names(paramvector) diff --git a/R/MARSShatyt.r b/R/MARSShatyt.r index 0a00625..688de6a 100644 --- a/R/MARSShatyt.r +++ b/R/MARSShatyt.r @@ -2,6 +2,50 @@ # MARSShatyt function # Expectations involving hatyt ####################################################################################################### +#' Compute Expected Value of Y, YY, and YX +#' +#' @description +#' Computes the expected value of random variables involving \eqn{\mathbf{Y}}{Y}. Users can use [tsSmooth.marssMLE] or `print( MLEobj, what="Ey")` to access this output. See [print.marssMLE]. +#' +#' @param MLEobj A [marssMLE] object with the `par` element of estimated parameters, `model` element with the model description and data. +#' @param only.kem If TRUE, return only `ytT`, `OtT`, `yxtT`, and `yxttpT` (values conditioned on the data from \eqn{1:T}) needed for the EM algorithm. If `only.kem=FALSE`, then also return values conditioned on data from 1 to \eqn{t-1} (`Ott1` and `ytt1`) and 1 to \eqn{t} (`Ott` and `ytt`), `yxtt1T` (\eqn{\textrm{var}[\mathbf{Y}_t, \mathbf{X}_{t-1}|\mathbf{y}_{1:T}]}{var[Y(t),X(t-1)|1:T]}), var.ytT (\eqn{\textrm{var}[\mathbf{Y}_t|\mathbf{y}_{1:T}]}{var[Y(t)|1:T]}), and var.EytT (\eqn{\textrm{var}_X[E_{Y|x}[\mathbf{Y}_t|\mathbf{y}_{1:T},\mathbf{x}_t]]}{var_X[E_{Y|x}[Y(t)|1:T,x(t)]]}). +#' +#' @details +#' For state space models, `MARSShatyt()` computes the expectations involving \eqn{\mathbf{Y}}{Y}. If \eqn{\mathbf{Y}}{Y} is completely observed, this entails simply replacing \eqn{\mathbf{Y}}{Y} with the observed \eqn{\mathbf{y}}{y}. When \eqn{\mathbf{Y}}{Y} is only partially observed, the expectation involves the conditional expectation of a multivariate normal. +#' +#' @return +#' A list with the following components (n is the number of state processes). Following the notation in Holmes (2012), \eqn{\mathbf{y}(1)}{y(1)} is the observed data (for \eqn{t=1:T}) while \eqn{\mathbf{y}(2)}{y(2)} is the unobserved data. \eqn{\mathbf{y}(1,1:t-1)}{y(1,1:t-1)} is the observed data from time 1 to \eqn{t-1}. +#' * `ytT`: E[Y(t) | Y(1,1:T)=y(1,1:T)] (n x T matrix). +#' * `ytt1`: E[Y(t) | Y(1,1:t-1)=y(1,1:t-1)] (n x T matrix). +#' * `ytt`: E[Y(t) | Y(1,1:t)=y(1,1:t)] (n x T matrix). +#' * `OtT`: E[Y(t) t(Y(t)) | Y(1,1:T)=y(1,1:T)] (n x n x T array). +#' * `var.ytT`: var[Y(t) | Y(1,1:T)=y(1,1:T)] (n x n x T array). +#' * `var.EytT`: var_X[E_Y[Y(t) | Y(1,1:T)=y(1,1:T), X(t)=x(t)]] (n x n x T array). +#' * `Ott1`: E[Y(t) t(Y(t)) | Y(1,1:t-1)=y(1,1:t-1)] (n x n x T array). +#' * `var.ytt1`: var[Y(t) | Y(1,1:t-1)=y(1,1:t-1)] (n x n x T array). +#' * `var.Eytt1`: var_X[E_Y[Y(t) | Y(1,1:t-1)=y(1,1:t-1), X(t)=x(t)]] (n x n x T array). +#' * `Ott`: E[Y(t) t(Y(t)) | Y(1,1:t)=y(1,1:t)] (n x n x T array). +#' * `yxtT`: E[Y(t) t(X(t)) | Y(1,1:T)=y(1,1:T)] (n x m x T array). +#' * `yxtt1T`: E[Y(t) t(X(t-1)) | Y(1,1:T)=y(1,1:T)] (n x m x T array). +#' * `yxttpT`: E[Y(t) t(X(t+1)) | Y(1,1:T)=y(1,1:T)] (n x m x T array). +#' * `errors`: Any error messages due to ill-conditioned matrices. +#' * `ok`: (TRUE/FALSE) Whether errors were generated. +#' +#' @references +#' Holmes, E. E. (2012) Derivation of the EM algorithm for constrained and unconstrained multivariate autoregressive state-space (MARSS) models. Technical report. arXiv:1302.3919 [stat.ME] Type `RShowDoc("EMDerivation",package="MARSS")` to open a copy. See the section on 'Computing the expectations in the update equations' and the subsections on expectations involving Y. +#' +#' @author +#' Eli Holmes, NOAA, Seattle, USA. +#' +#' @seealso +#' [MARSS()], [marssMODEL], [MARSSkem()] +#' +#' @examples +#' dat <- t(harborSeal) +#' dat <- dat[2:3, ] +#' fit <- MARSS(dat) +#' EyList <- MARSShatyt(fit) +#' @export MARSShatyt <- function(MLEobj, only.kem = TRUE) { MODELobj <- MLEobj[["marss"]] if (!is.null(MLEobj[["kf"]])) { diff --git a/R/MARSShessian.R b/R/MARSShessian.R index 468700a..c39ec56 100644 --- a/R/MARSShessian.R +++ b/R/MARSShessian.R @@ -1,6 +1,50 @@ # Attaches Hessian, parSigma and parMean to MLEobj # Computed at the values in MLEobj$par # For confidence intervals, this should be the MLEs +#' Parameter Variance-Covariance Matrix from the Hessian Matrix +#' +#' @description +#' Calculates an approximate parameter variance-covariance matrix for the parameters using an inverse of the Hessian of the negative log-likelihood function at the MLEs (the observed Fisher Information matrix). It appends `$Hessian`, `$parMean`, `$parSigma` to the [marssMLE] object. +#' +#' @param MLEobj An object of class [marssMLE]. This object must have a `$par` element containing MLE parameter estimates from e.g. [MARSSkem]. +#' @param method The method to use for computing the Hessian. Options are `Harvey1989` to use the Harvey (1989) recursion, which is an analytical solution, `fdHess` or `optim` which are two numerical methods. Although `optim` can be passed to this function, in the internal functions which call this function, `fdHess` will be used if a numerical estimate is requested. +#' +#' @details +#' See [MARSSFisherI] for a discussion of the observed Fisher Information matrix and references. +#' +#' Method `fdHess` uses [nlme::fdHess] from package nlme to numerically estimate the Hessian matrix (the matrix of partial 2nd derivatives of the negative log-likelihood function at the MLE). Method `optim` uses [optim] with `hessian=TRUE` and `list(maxit=0)` to ensure that the Hessian is computed at the values in the `par` element of the MLE object. Method `Harvey1989` (the default) uses the recursion in Harvey (1989) to compute the observed Fisher Information of a MARSS model analytically. +#' +#' Note that the parameter confidence intervals computed with the observed Fisher Information matrix are based on the asymptotic normality of maximum-likelihood estimates under a large-sample approximation. +#' +#' @return +#' `MARSShessian()` attaches `Hessian`, `parMean` and `parSigma` to the [marssMLE] object that is passed into the function. +#' +#' @author +#' Eli Holmes, NOAA, Seattle, USA. +#' +#' @seealso [MARSSFisherI()], [MARSSharveyobsFI()], [MARSShessian.numerical()], [MARSSparamCIs()], [marssMLE] +#' +#' @examples +#' dat <- t(harborSeal) +#' dat <- dat[c(2, 11), ] +#' MLEobj <- MARSS(dat) +#' MLEobj.hessian <- MARSShessian(MLEobj) +#' +#' # show the approx Hessian +#' MLEobj.hessian$Hessian +#' +#' # generate a parameter sample using the Hessian +#' # this uses the rmvnorm function in the mvtnorm package +#' hess.params <- mvtnorm::rmvnorm(1, +#' mean = MLEobj.hessian$parMean, +#' sigma = MLEobj.hessian$parSigma +#' ) +#' +#' @references +#' Harvey, A. C. (1989) Section 3.4.5 (Information matrix) in Forecasting, structural time series models and the Kalman filter. Cambridge University Press, Cambridge, UK. +#' +#' See also J. E. Cavanaugh and R. H. Shumway (1996) On computing the expected Fisher information matrix for state-space model parameters. Statistics & Probability Letters 26: 347-355. This paper discusses the Harvey (1989) recursion (and proposes an alternative). +#' @export MARSShessian <- function(MLEobj, method = c("Harvey1989", "fdHess", "optim")) { method <- match.arg(method) paramvec <- MARSSvectorizeparam(MLEobj) diff --git a/R/MARSShessian_numerical.R b/R/MARSShessian_numerical.R index 707ad0a..ac7d773 100644 --- a/R/MARSShessian_numerical.R +++ b/R/MARSShessian_numerical.R @@ -8,6 +8,31 @@ # # Adds Hessian, parameter var-cov matrix, and parameter mean to a marssMLE object ####################################################################################################### +#' Hessian Matrix via Numerical Approximation +#' +#' @description +#' Calculates the Hessian of the log-likelihood function at the MLEs using either the [nlme::fdHess] function in the nlme package or the [optim] function. This is a utility function in the [MARSS-package] and is not exported. Use [MARSShessian] to access. +#' +#' @param MLEobj An object of class [marssMLE]. This object must have a `$par` element containing MLE parameter estimates from e.g. [MARSSkem]. +#' @param fun The function to use for computing the Hessian. Options are 'fdHess' or 'optim'. +#' +#' @details +#' Method `fdHess` uses [nlme::fdHess] from package nlme to numerically estimate the Hessian matrix (the matrix of partial 2nd derivatives) of the negative log-likelihood function with respect to the parameters. Method `optim` uses [optim] with `hessian=TRUE` and `list(maxit=0)` to ensure that the Hessian is computed at the values in the `par` element of the MLE object. +#' +#' @return +#' The numerically estimated Hessian of the log-likelihood function at the maximum likelihood estimates. +#' +#' @author +#' Eli Holmes, NOAA, Seattle, USA. +#' +#' @seealso [MARSSharveyobsFI()], [MARSShessian()], [MARSSparamCIs()] +#' +#' @examples +#' dat <- t(harborSeal) +#' dat <- dat[c(2, 11), ] +#' MLEobj <- MARSS(dat) +#' MARSS:::MARSShessian.numerical(MLEobj) +#' @keywords internal MARSShessian.numerical <- function(MLEobj, fun = c("fdHess", "optim")) { fun <- match.arg(fun) kfNLL <- function(x, MLEobj = NULL) { # NULL assignment needed for optim call syntax diff --git a/R/MARSSinfo.R b/R/MARSSinfo.R index 99e433d..e6bc53d 100644 --- a/R/MARSSinfo.R +++ b/R/MARSSinfo.R @@ -1,3 +1,20 @@ +#' MARSS Error Messages and Warnings +#' +#' @description +#' Prints out more information for MARSS error messages and warnings. +#' +#' @param number An error or warning message number. +#' +#' @return +#' A print out of information. +#' +#' @author +#' Eli Holmes, NOAA, Seattle, USA. +#' +#' @examples +#' # Show all the info options +#' MARSSinfo() +#' @export MARSSinfo <- function(number) { if (missing(number)) { cat("Pass in a single label (in quotes) to get info on a MARSS error or warning message. diff --git a/R/MARSSinits.R b/R/MARSSinits.R index 5f114da..fd28a1d 100644 --- a/R/MARSSinits.R +++ b/R/MARSSinits.R @@ -4,6 +4,38 @@ ## Will return a par list that looks just like MLEobj par list ## Wants either a scalar (dim=NULL) or a matrix the same size as $par[[elem]] or a marssMLE object with the par element +#' Initial Values for MLE +#' +#' @description +#' Sets up generic starting values for parameters for maximum-likelihood estimation algorithms that use an iterative maximization routine needing starting values. Examples of such algorithms are the EM algorithm in [MARSSkem()] and Newton methods in [MARSSoptim()]. This is a utility function in the [MARSS-package]. It is not exported to the user. Users looking for information on specifying initial conditions should look at the help file for [MARSS()] and the User Guide section on initial conditions. +#' +#' The function assumes that the user passed in the inits list using the parameter names in whatever form was specified in the [MARSS()] call. The default is form="marxss". The [MARSSinits()] function calls MARSSinits_foo, where foo is the form specified in the [MARSS()] call. MARSSinits_foo translates the inits list in form foo into form marss. +#' +#' @param MLEobj An object of class [marssMLE]. +#' @param inits A list of column vectors (matrices with one column) of the estimated values in each parameter matrix. +#' +#' @details +#' Creates an `inits` parameter list for use by iterative maximization algorithms. +#' +#' Default values for `inits` is supplied in `MARSSsettings.R`. The user can alter these and supply any of the following (m is the dim of X and n is the dim of Y in the MARSS model): +#' +#' * `elem=A,U` A numeric vector or matrix which will be constructed into `inits$elem` by the command `array(inits$elem),dim=c(n or m,1))`. If elem is fixed in the model, any `inits$elem` values will be overridden and replaced with the fixed value. Default is `array(0,dim=c(n or m,1))`. +#' * `elem=Q,R,B` A numeric vector or matrix. If length equals the length `MODELobj$fixed$elem` then `inits$elem` will be constructed by `array(inits$elem),dim=dim(MODELobj$fixed$elem))`. If length is 1 or equals dim of `Q` or dim of `R` then `inits$elem` will be constructed into a diagonal matrix by the command `diag(inits$elem)`. If elem is fixed in the model, any `inits$elem` values will be overridden and replaced with the fixed value. Default is `diag(0.05, dim of Q or R)` for `Q` and `R`. Default is `diag(1,m)` for `B`. +#' * `x0` If `inits$x0=-99`, then starting values for `x0` are estimated by a linear regression through the count data assuming `A` is all zero. This will be a poor start if `inits$A` is not 0. If `inits$x0` is a numeric vector or matrix, `inits$x0` will be constructed by the command `array(inits$x0),dim=c(m,1))`. If `x0` is fixed in the model, any `inits$x0` values will be overridden and replaced with the fixed value. Default is `inits$x0=-99`. +#' * `Z` If `Z` is fixed in the model, `inits$Z` set to the fixed value. If `Z` is not fixed, then the user must supply `inits$Z`. There is no default. +#' * `elem=V0` `V0` is never estimated, so this is never used. +#' +#' @return +#' A list with initial values for the estimated values for each parameter matrix in a MARSS model in marss form. So this will be a list with elements `B`, `U`, `Q`, `Z`, `A`, `R`, `x0`, `V0`, `G`, `H`, `L`. +#' +#' @note +#' Within the base code, a form-specific internal `MARSSinits` function is called to allow the output to vary based on form: `MARSSinits_dfa`, `MARSSinits_marss`, `MARSSinits_marxss`. +#' +#' @author +#' Eli Holmes, NOAA, Seattle, USA. +#' +#' @seealso [marssMODEL], [MARSSkem()], [MARSSoptim()] +#' @export MARSSinits <- function(MLEobj, inits = list(B = 1, U = 0, Q = 0.05, Z = 1, A = 0, R = 0.05, x0 = -99, V0 = 5, G = 0, H = 0, L = 0)) { MODELobj <- MLEobj[["marss"]] method <- MLEobj[["method"]] diff --git a/R/MARSSinnovationsboot.R b/R/MARSSinnovationsboot.R index 2a5452e..6daa086 100644 --- a/R/MARSSinnovationsboot.R +++ b/R/MARSSinnovationsboot.R @@ -4,6 +4,58 @@ # It creates bootstrap data via sampling from the standardized innovations matrix # In the MARSS code, this is referred to as the nonparametric bootstrap. Strictly speaking, it is not nonparametric. ######################################################################################################################## +#' Bootstrapped Data using Stoffer and Wall's Algorithm +#' +#' @description +#' Creates bootstrap data via sampling from the standardized innovations matrix. +#' This is an internal function in the **MARSS** package and is not exported. +#' Users should access this with [MARSSboot()]. +#' +#' @param MLEobj An object of class [marssMLE]. This object must have a `$par` +#' element containing MLE parameter estimates from e.g. [MARSSkem()] or +#' [MARSS()]. This algorithm may not be used if there are missing datapoints +#' in the data. +#' @param nboot Number of bootstraps to perform. +#' @param minIndx Number of innovations to skip. Stoffer & Wall suggest not +#' sampling from innovations 1-3. +#' +#' @details +#' Stoffer and Wall (1991) present an algorithm for generating CIs via a +#' non-parametric bootstrap for state-space models. The basic idea is that the +#' Kalman filter can be used to generate estimates of the residuals of the model +#' fit. These residuals are then standardized and resampled and used to generate +#' bootstrapped data using the MARSS model and its maximum-likelihood parameter +#' estimates. One of the limitations of the Stoffer and Wall algorithm is that +#' it cannot be used when there are missing data, unless all data at time +#' \eqn{t} are missing. +#' +#' @return +#' A list containing the following components: +#' +#' * `boot.states`: Array (dim is m x tSteps x nboot) of simulated state processes. +#' * `boot.data`: Array (dim is n x tSteps x nboot) of simulated data. +#' * `marss`: [marssMODEL] object element of the [marssMLE] object (`marssMLE$marss`) in "marss" form. +#' * `nboot`: Number of bootstraps performed. +#' +#' m is the number state processes (x in the MARSS model) and n is the number +#' of observation time series (y in the MARSS model). +#' +#' @references +#' Stoffer, D. S., and K. D. Wall. 1991. Bootstrapping state-space models: +#' Gaussian maximum likelihood estimation and the Kalman filter. Journal of +#' the American Statistical Association 86:1024-1033. +#' +#' @author +#' Eli Holmes and Eric Ward, NOAA, Seattle, USA. +#' +#' @seealso [stdInnov()], [MARSSparamCIs()], [MARSSboot()] +#' +#' @examples +#' dat <- t(kestrel) +#' dat <- dat[2:3, ] +#' fit <- MARSS(dat, model = list(U = "equal", Q = diag(.01, 2))) +#' boot.obj <- MARSSinnovationsboot(fit) +#' @export MARSSinnovationsboot <- function(MLEobj, nboot = 1000, minIndx = 3) { if (any(is.na(MLEobj$marss$data))) { stop("Stopped in MARSSinnovationsboot() because this algorithm resamples from the innovations and doesn't allow missing values.\n", call. = FALSE) @@ -88,6 +140,39 @@ MARSSinnovationsboot <- function(MLEobj, nboot = 1000, minIndx = 3) { ###################################################################################################################### # stdInnov ###################################################################################################################### +#' Standardized Innovations +#' +#' @description +#' Standardizes Kalman filter innovations. This is a helper function called by +#' [MARSSinnovationsboot()] in the **MARSS** package. Not exported. +#' +#' @param SIGMA n x n x T array of Kalman filter innovations variances. This is +#' output from [MARSSkf()]. +#' @param INNOV n x T matrix of Kalman filter innovations. This is output from +#' [MARSSkf()]. +#' +#' @details +#' n = number of observation (y) time series. T = number of time steps in the +#' time series. +#' +#' @return +#' n x T matrix of standardized innovations. +#' +#' @references +#' Stoffer, D. S., and K. D. Wall. 1991. Bootstrapping state-space models: +#' Gaussian maximum likelihood estimation and the Kalman filter. Journal of +#' the American Statistical Association 86:1024-1033. +#' +#' @author +#' Eli Holmes, NOAA, Seattle, USA. +#' +#' @seealso [MARSSboot()], [MARSSkf()], [MARSSinnovationsboot()] +#' +#' @examples +#' \dontrun{ +#' std.innovations <- stdInnov(kfList$Sigma, kfList$Innov) +#' } +#' @keywords internal stdInnov <- function(SIGMA, INNOV) { # This function added by EW Nov 3, 2008 # SIGMA is covariance matrix, E are original innovations diff --git a/R/MARSSkem.r b/R/MARSSkem.r index 0fa7e17..3f16fea 100644 --- a/R/MARSSkem.r +++ b/R/MARSSkem.r @@ -3,6 +3,90 @@ # Minimal error checking is done. You should run is.marssMLE(MLEobj) before calling this. # Maximization using an EM algorithm with Kalman filter ####################################################################################################### +#' EM Algorithm function for MARSS models +#' +#' @description +#' `MARSSkem()` performs maximum-likelihood estimation, using an EM algorithm for constrained and unconstrained MARSS models. Users would not call this function directly normally. The function [MARSS()] calls `MARSSkem()`. However users might want to use `MARSSkem()` directly if they need to avoid some of the error-checking overhead associated with the [MARSS()] function. +#' +#' @param MLEobj An object of class [marssMLE]. +#' +#' @details +#' Objects of class [marssMLE] may be built from scratch but are easier to construct using [MARSS()] with `MARSS(..., fit=FALSE)`. +#' +#' Options for `MARSSkem()` may be set using `MLEobj$control`. The commonly used elements of `control` are as follows (see [marssMLE]): +#' +#' * `minit`: Minimum number of EM iterations. You can use this to force the algorithm to do a certain number of iterations. This is helpful if your solution is not converging. +#' * `maxit`: Maximum number of EM iterations. +#' * `min.iter.conv.test`: The minimum number of iterations before the log-log convergence test will be computed. If `maxit` is set less than this, then convergence will not be computed (and the algorithm will just run for maxit iterations). +#' * `kf.x0`: Whether to set the prior at \eqn{t=0} (`"x00"`) or at \eqn{t=1} (`"x10"`). The default is `"x00"`. +#' * `conv.test.deltaT`: The number of iterations to use in the log-log convergence test. This defaults to 9. +#' * `abstol`: Tolerance for log-likelihood change for the delta logLik convergence test. If log-likelihood changes less than this amount relative to the previous iteration, the EM algorithm exits. This is normally (default) set to NULL and the log-log convergence test is used instead. +#' * `allow.degen`: Whether to try setting \eqn{\mathbf{Q}}{Q} or \eqn{\mathbf{R}}{R} elements to zero if they appear to be going to zero. +#' * `trace`: A positive integer. If not 0, a record will be created of each variable over all EM iterations and detailed warning messages (if appropriate) will be printed. +#' * `safe`: If TRUE, `MARSSkem` will rerun [MARSSkf()] after each individual parameter update rather than only after all parameters are updated. The latter is slower and unnecessary for many models, but in some cases, the safer and slower algorithm is needed because the ML parameter matrices have high condition numbers. +#' * `silent`: Suppresses printing of progress bars, error messages, warnings and convergence information. +#' +#' @return +#' The [marssMLE] object which was passed in, with additional components: +#' * `method`: String "kem". +#' * `kf`: Kalman filter output. +#' * `iter.record`: If `MLEobj$control$trace = TRUE`, a list with `par` = a record of each estimated parameter over all EM iterations and `logLik` = a record of the log likelihood at each iteration. +#' * `numIter`: Number of iterations needed for convergence. +#' * `convergence`: Did estimation converge successfully? +#' * `convergence=0`: Converged in both the abstol test and the log-log plot test. +#' * `convergence=1`: Some of the parameter estimates did not converge (based on the log-log plot test AND abstol tests) before `MLEobj$control$maxit` was reached. This is not an error per se. +#' * `convergence=3`: No convergence diagnostics were computed because all parameters were fixed thus no fitting required. +#' * `convergence=-1`: No convergence diagnostics were computed because the MLE object was not fit (called with fit=FALSE). This isn't a convergence error just information. There is not par element so no functions can be run with the object. +#' * `convergence=2`: No convergence diagnostics were computed because the MLE object had problems and was not fit. This isn't a convergence error just information. +#' * `convergence=10`: Abstol convergence only. Some of the parameter estimates did not converge (based on the log-log plot test) before `MLEobj$control$maxit` was reached. However `MLEobj$control$abstol` was reached. +#' * `convergence=11`: Log-log convergence only. Some of the parameter estimates did not converge (based on the abstol test) before `MLEobj$control$maxit` was reached. However the log-log convergence test was passed. +#' * `convergence=12`: Abstol convergence only. Log-log convergence test was not computed because `MLEobj$control$maxit` was set to less than `control$min.iter.conv.test`. +#' * `convergence=13`: Lack of convergence info. Parameter estimates did not converge based on the abstol test before `MLEobj$control$maxit` was reached. No log-log information since `control$min.iter.conv.test` is less than `MLEobj$control$maxit` so no log-log plot test could be done. +#' * `convergence=42`: `MLEobj$control$abstol` was reached but the log-log plot test returned NAs. This is an odd error and you should set `control$trace=TRUE` and look at the outputted `$iter.record` to see what is wrong. +#' * `convergence=52`: The EM algorithm was abandoned due to numerical errors. Usually this means one of the variances either went to zero or to all elements being equal. This is not an error per se. Most likely it means that your model is not very good for your data (too inflexible or too many parameters). Try setting `control$trace=1` to view a detailed error report. +#' * `convergence=53`: The algorithm was abandoned due to numerical errors in the likelihood calculation from [MARSSkf()]. +#' * `convergence=62`: The algorithm was abandoned due to errors in the log-log convergence test. You should not get this error (it is included for debugging purposes to catch improper arguments passed into the log-log convergence test). +#' * `convergence=63`: The algorithm was run for `control$maxit` iterations, `control$abstol` not reached, and the log-log convergence test returned errors. You should not get this error (it is included for debugging purposes to catch improper arguments passed into the log-log convergence test). +#' * `convergence=72`: Other convergence errors. This is included for debugging purposes to catch misc. errors. +#' * `logLik`: Log-likelihood. +#' * `states`: State estimates from the Kalman smoother. +#' * `states.se`: Confidence intervals based on state standard errors, see caption of Fig 6.3 (p. 337) in Shumway & Stoffer (2006). +#' * `errors`: Any error messages. +#' +#' @section Discussion: +#' To ensure that the global maximum-likelihood values are found, it is recommended that you test the fit under different initial parameter values, particularly if the model is not a good fit to the data. This requires more computation time, but reduces the chance of the algorithm terminating at a local maximum and not reaching the true MLEs. For many models and for draft analyses, this is unnecessary, but answers should be checked using an initial conditions search before reporting final values. See the chapter on initial conditions in the User Guide for a discussion on how to do this. +#' +#' `MARSSkem()` calls a Kalman filter/smoother [MARSSkf()] for hidden state estimation. The algorithm allows two options for the initial state conditions: fixed but unknown or a prior. In the first case, x0 (whether at t=0 or t=1) is treated as fixed but unknown (estimated); in this case, `fixed$V0=0` and x0 is estimated. This is the default behavior. In the second case, the initial conditions are specified with a prior and V0!=0. In the later case, x0 or V0 may be estimated. MARSS will allow you to try to estimate both, but many researchers have noted that this is not robust so you should fix one or the other. +#' +#' If you get errors, you can type [MARSSinfo()] for help. Fitting problems often mean that the solution involves an ill-conditioned matrix. For example, your \eqn{\mathbf{Q}}{Q} or \eqn{\mathbf{R}}{R} matrix is going to a value in which all elements have the same value, for example zero. If for example, you tried to fit a model with a fixed \eqn{\mathbf{R}}{R} matrix with high values on the diagonal and the variance in that \eqn{\mathbf{R}}{R} matrix (diagonal terms) was much higher than what is actually in the data, then you might drive \eqn{\mathbf{Q}}{Q} to zero. Also if you try to fit a structurally inadequate model, then it is not unusual that \eqn{\mathbf{Q}}{Q} will be driven to zero. For example, if you fit a model with 1 hidden state trajectory to data that clearly have 2 quite different hidden state trajectories, you might have this problem. Comparing the likelihood of this model to a model with more structural flexibility should reveal that the structurally inflexible model is inadequate (much lower likelihood). +#' +#' Convergence testing is done via a combination of two tests. The first test (abstol test) is the test that the change in the absolute value of the log-likelihood from one iteration to another is less than some tolerance value (abstol). The second test (log-log test) is that the slope of a plot of the log of the parameter value or log-likelihood versus the log of the iteration number is less than some tolerance. Both of these must be met to generate the Success! parameters converged output. If you want to circumvent one of these tests, then set the tolerance for the unwanted test to be high. That will guarantee that that test is met before the convergence test you want to use is met. The tolerance for the abstol test is set by `control$abstol` and the tolerance for the log-log test is set by `control$conv.test.slope.tol`. Anything over 1 is huge for both of these. +#' +#' @references +#' R. H. Shumway and D. S. Stoffer (2006). Chapter 6 in Time series analysis and its applications. Springer-Verlag, New York. +#' +#' Ghahramani, Z. and Hinton, G. E. (1996) Parameter estimation for linear dynamical systems. Technical Report CRG-TR-96-2, University of Toronto, Dept. of Computer Science. +#' +#' Harvey, A. C. (1989) Chapter 5 in Forecasting, structural time series models and the Kalman filter. Cambridge University Press, Cambridge, UK. +#' +#' The MARSS User Guide: Holmes, E. E., E. J. Ward, and M. D. Scheuerell (2012) Analysis of multivariate time-series using the MARSS package. NOAA Fisheries, Northwest Fisheries Science Center, 2725 Montlake Blvd E., Seattle, WA 98112 Go to [User Guide](https://cran.r-project.org/package=MARSS/vignettes/UserGuide.pdf) to open the most recent version. +#' +#' Holmes, E. E. (2012). Derivation of the EM algorithm for constrained and unconstrained multivariate autoregressive state-space (MARSS) models. Technical Report. arXiv:1302.3919 [stat.ME] [EMDerivation](https://cran.r-project.org/package=MARSS/vignettes/EMDerivation.pdf) has the most recent version. +#' +#' @author +#' Eli Holmes and Eric Ward, NOAA, Seattle, USA. +#' +#' @seealso +#' [MARSSkf()], [marssMLE], [MARSSoptim()], [MARSSinfo()] +#' +#' @examples +#' dat <- t(harborSeal) +#' dat <- dat[2:4, ] +#' # you can use MARSS to construct a proper marssMLE object. +#' fit <- MARSS(dat, model = list(Q = "diagonal and equal", U = "equal"), fit = FALSE) +#' # Pass this marssMLE object to MARSSkem to do the fit. +#' kemfit <- MARSSkem(fit) +#' @export MARSSkem <- function(MLEobj) { MODELobj <- MLEobj[["marss"]] # This is a core function and does not check if user specified a legal or solveable model. diff --git a/R/MARSSkemcheck.r b/R/MARSSkemcheck.r index 95be2d7..1c4a384 100644 --- a/R/MARSSkemcheck.r +++ b/R/MARSSkemcheck.r @@ -1,3 +1,18 @@ +#' Model Checking for MLE objects Passed to MARSSkem +#' +#' @description +#' This is a helper function in the [MARSS-package] that checks that the model can be handled by the [MARSSkem] algorithm. It also returns the structure of the model as a list of text strings. +#' +#' @param MLEobj An object of class [marssMLE]. +#' +#' @return +#' A list with of the model elements A, B, Q, R, U, x0, Z, V0 specifying the structure of the model using text strings). +#' +#' @author +#' Eli Holmes, NOAA, Seattle, USA. +#' +#' @seealso [marssMODEL], [MARSSkem()] +#' @keywords internal MARSSkemcheck <- function(MLEobj) { # This checks that the model can be handled by the MARSSkem algorithm # Most of this is implementing the restrictions in Summary of Requirements for Degenerate Models in derivation diff --git a/R/MARSSkf.r b/R/MARSSkf.r index 88a45d8..0a86457 100644 --- a/R/MARSSkf.r +++ b/R/MARSSkf.r @@ -2,6 +2,93 @@ # MARSSkf function # Utility function to choose the Kalman filter and smoother ####################################################################################################### +#' Kalman Filtering and Smoothing +#' +#' Provides Kalman filter and smoother output for MARSS models with (or without) time-varying parameters. `MARSSkf()` is a small helper function to select which Kalman filter/smoother function to use based on the value in `MLEobj$fun.kf`. The choices are `MARSSkfas()` which uses the filtering and smoothing algorithms in the [KFAS](https://CRAN.R-project.org/package=KFAS) package based on algorithms in Koopman and Durbin (2001-2003), and `MARSSkfss()` which uses the algorithms in Shumway and Stoffer. The default function is `MARSSkfas()` which is faster and generally more stable (fewer matrix inversions), but there are some cases where `MARSSkfss()` might be more stable and it returns a variety of diagnostics that `MARSSkfas()` does not. +#' +#' @param MLEobj A [marssMLE()] object with the `par` element of estimated parameters, `marss` element with the model description (in marss form) and data, and `control` element for the fitting algorithm specifications. `control$debugkf` specifies that detailed error reporting will be returned (only used by `MARSSkf()`). `model$diffuse=TRUE` specifies that a diffuse prior be used (only used by `MARSSkfas()`). See [KFAS::KFS()] documentation. When the diffuse prior is set, `V0` should be non-zero since the diffuse prior variance is `V0*kappa`, where kappa goes to infinity. +#' @param smoother Used by `MARSSkfss()`. If set to FALSE, only the Kalman filter is run. The output `xtT`, `VtT`, `x0T`, `Vtt1T`, `V0T`, and `J0` will be NULL. +#' @param only.logLik Used by `MARSSkfas()`. If set, only the log-likelihood is returned using the [KFAS::KFAS()] package function [KFAS::logLik.SSModel()]. This is much faster if only the log-likelihood is needed. +#' @param return.lag.one Used by `MARSSkfas()`. If set to FALSE, the smoothed lag-one covariance values are not returned (output `Vtt1T` is set to NULL). This speeds up `MARSSkfas()` because to return the smoothed lag-one covariance a stacked MARSS model is used with twice the number of state vectors---thus the state matrices are larger and take more time to work with. +#' @param return.kfas.model Used by `MARSSkfas()`. If set to TRUE, it returns the MARSS model in [KFAS::KFAS()] model form (class [KFAS::SSModel()]). This is useful if you want to use other KFAS functions or write your own functions to work with [optim()] to do optimization. This can speed things up since there is a bit of code overhead in [MARSSoptim()] associated with the [marssMODEL()] model specification needed for the constrained EM algorithm (but not strictly needed for [optim()]; useful but not required.). +#' @param newdata A new matrix of data to use in place of the data used to fit the model (in the `model$data` and `marss$data` elements of a [marssMLE()] object). If the initial \eqn{x} was estimated (in `x0`) then this estimate will be used for `newdata` and this may not be appropriate. +#' +#' @details +#' For state-space models, the Kalman filter and smoother provide optimal (minimum mean square error) estimates of the hidden states. The Kalman filter is a forward recursive algorithm which computes estimates of the states \eqn{\mathbf{x}_t}{x(t)} conditioned on the data up to time \eqn{t} (`xtt`). The Kalman smoother is a backward recursive algorithm which starts at time \eqn{T} and works backwards to \eqn{t = 1} to provide estimates of the states conditioned on all data (`xtT`). The data may contain missing values (NAs). All parameters may be time varying. +#' +#' The initial state is either an estimated parameter or treated as a prior (with mean and variance). The initial state can be specified at \eqn{t=0} or \eqn{t=1}. The EM algorithm in the MARSS package ([MARSSkem()]) provides both Shumway and Stoffer's derivation that uses \eqn{t=0} and Ghahramani et al algorithm which uses \eqn{t=1}. The `MLEobj$model$tinitx` argument specifies whether the initial states (specified with `x0` and `V0` in the `model` list) is at \eqn{t=0} (`tinitx=0`) or \eqn{t=1} (`tinitx=1`). If `MLEobj$model$tinitx=0`, `x0` is defined as \eqn{\textrm{E}[\mathbf{X}_0|\mathbf{y}_0]}{E[X(0)|y(0)]} and `V0` is defined as \eqn{\textrm{E}[\mathbf{X}_0\mathbf{X}_0|\mathbf{y}_0]}{E[X(0)X(0)|y(0)]} which appear in the Kalman filter at \eqn{t=1} (first set of equations). If `MLEobj$model$tinitx=1`, `x0` is defined as \eqn{\textrm{E}[\mathbf{X}_1|\mathbf{y}_0]}{E[X(1)|y(0)]} and `V0` is defined as \eqn{\textrm{E}[\mathbf{X}_1\mathbf{X}_1|\mathbf{y}_0]}{E[X(1)X(1)|y(0)]} which appear in the Kalman filter at \eqn{t=1} (and the filter starts at t=1 at the 3rd and 4th equations in the Kalman filter recursion). Thus if `MLEobj$model$tinitx=1`, `x0=xtt1[,1]` and `V0=Vtt1[,,1]` in the Kalman filter output while if `MLEobj$model$tinitx=0`, the initial condition will not be in the filter output since time starts at 1 not 0 in the output. +#' +#' `MARSSkfss()` is a native R implementation based on the Kalman filter and smoother equation as shown in Shumway and Stoffer (sec 6.2, 2006). The equations have been altered to allow the initial state distribution to be to be specified at \eqn{t=0} or \eqn{t=1} (data starts at \eqn{t=1}) per per Ghahramani and Hinton (1996). In addition, the filter and smoother equations have been altered to allow partially deterministic models (some or all elements of the \eqn{\mathbf{Q}}{Q} diagonals equal to 0), partially perfect observation models (some or all elements of the \eqn{\mathbf{R}}{R} diagonal equal to 0) and fixed (albeit unknown) initial states (some or all elements of the \eqn{\mathbf{V0}}{V0} diagonal equal to 0) (per Holmes 2012). The code includes numerous checks to alert the user if matrices are becoming ill-conditioned and the algorithm unstable. +#' +#' `MARSSkfas()` uses the (Fortran-based) Kalman filter and smoother function ([KFAS::KFS()]) in the [KFAS](https://cran.r-project.org/package=KFAS) package (Helske 2012) based on the algorithms of Koopman and Durbin (2000, 2001, 2003). The Koopman and Durbin algorithm is faster and more stable since it avoids matrix inverses. Exact diffuse priors are also allowed in the KFAS Kalman filter function. The standard output from the KFAS functions do not include the lag-one covariance smoother needed for the EM algorithm. `MARSSkfas` computes the smoothed lag-one covariance using the Kalman filter applied to a stacked MARSS model as described on page 321 in Shumway and Stoffer (2000). Also the KFAS model specification only has the initial state at \eqn{t=1} (as \eqn{\mathbf{X}_1}{X(1)} conditioned on \eqn{\mathbf{y}_0}{y(0)}, which is missing). When the initial state is specified at \eqn{t=0} (as \eqn{\mathbf{X}_0}{X(0)} conditioned on \eqn{\mathbf{y}_0}{y(0)}), `MARSSkfas()` computes the required \eqn{\textrm{E}[\mathbf{X}_1|\mathbf{y}_0}{E[X(1)|y(0)} and \eqn{\textrm{var}[\mathbf{X}_1|\mathbf{y}_0}{var[X(1)|y(0)} using the Kalman filter equations per Ghahramani and Hinton (1996). +#' +#' The likelihood returned for both functions is the exact likelihood when there are missing values rather than the approximate likelihood sometimes presented in texts for the missing values case. The functions return the same filter, smoother and log-likelihood values. The differences are that `MARSSkfas()` is faster (and more stable) but `MARSSkfss()` has many internal checks and error messages which can help debug numerical problems (but slow things down). Also `MARSSkfss()` returns some output specific to the traditional filter algorithm (`J` and `Kt`). +#' +#' @return +#' A list with the following components. \eqn{m} is the number of state processes and \eqn{n} is the number of observation time series. "V" elements are called "P" in Shumway and Stoffer (2006, eqn 6.17 with s=T). The output is referenced against equations in Shumway and Stoffer (2006) denoted S&S; the Kalman filter and smoother implemented in MARSS is for a more general MARSS model than that shown in S&S but the output has the same meaning. In the expectations below, the parameters are left off; \eqn{\textrm{E}[\mathbf{X} | \mathbf{y}_1^t]}{E[X | y(1:t)]} is really \eqn{\textrm{E}[\mathbf{X} | \Theta, \mathbf{Y}_1^t=\mathbf{y}_1^t]}{E[X | Theta, Y(1:t)=y(1:t)]} where \eqn{\Theta}{Theta} is the parameter list. \eqn{\mathbf{y}_1^t}{y(1:t)} denotes the data from \eqn{t=1} to \eqn{t=t}. +#' +#' The notation for the conditional expectations is \eqn{\mathbf{x}_t^t}{xtt(t)} = \eqn{\textrm{E}[\mathbf{X} | \mathbf{y}_1^t]}{E[X | y(1:t)]}, \eqn{\mathbf{x}_t^{t-1}}{xtt1(t)} = \eqn{\textrm{E}[\mathbf{X} | \mathbf{y}_1^{t-1}]}{E[X | y(1:t-1)]} and \eqn{\mathbf{x}_t^T}{xtT(t)} = \eqn{\textrm{E}[\mathbf{X} | \mathbf{y}_1^T]}{E[X | y(1:T)]}. The conditional variances and covariances use similar notation. Note that in the Holmes (2012), the EM Derivation, \eqn{\mathbf{x}_t^T}{xtT(t)} and \eqn{\mathbf{V}_t^T}{VtT(t)} are given special symbols because they appear repeatedly: \eqn{\tilde{\mathbf{x}}_t}{tildex(t)} and \eqn{\tilde{\mathbf{V}}_t}{tildeV(t)} but here the more general notation is used. +#' +#' * xtT: \eqn{\mathbf{x}_t^T}{xtT(t)} State first moment conditioned on \eqn{\mathbf{y}_1^T}{y(1:T)}: \eqn{\textrm{E}[\mathbf{X}_t|\mathbf{y}_1^T]}{E[X(t) | y(1:T)]} (m x T matrix). Kalman smoother output. +#' * VtT: \eqn{\mathbf{V}_t^T}{VtT(t)} State variance matrix conditioned on \eqn{\mathbf{y}_1^T}{y(1:T)}: \eqn{\textrm{E}[(\mathbf{X}_t-\mathbf{x}_t^T)(\mathbf{X}_t-\mathbf{x}_t^T)^\top|\mathbf{y}_1^T]}{E[(X(t)-xtT(t))(x(t)-xtT(t))'| | y(1:T)]} (m x m x T array). Kalman smoother output. Denoted \eqn{P_t^T}{P_t^T} in S&S (S&S eqn 6.18 with \eqn{s=T}, \eqn{t1=t2=t}). The state second moment \eqn{\textrm{E}[\mathbf{X}_t\mathbf{X}_t^\top|\mathbf{y}_1^T]}{E[X(t)X(t)'| y(1:T)]} is equal to \eqn{\mathbf{V}_t^T + \mathbf{x}_t^T(\mathbf{x}_t^T)^\top}{VtT(t)+xtT(t)xtT(t)'}. +#' * Vtt1T: \eqn{\mathbf{V}_{t,t-1}^T}{Vtt1T(t)} State lag-one cross-covariance matrix \eqn{\textrm{E}[(\mathbf{X}_t-\mathbf{x}_t^T)(\mathbf{X}_{t-1}-\mathbf{x}_{t-1}^T)^\top|\mathbf{y}_1^T]}{E[(X(t)-xtT(t))(X(t-1)-xtT(t-1))' | y(1:T)]} (m x m x T). Kalman smoother output. \eqn{P_{t,t-1}^T} in S&S (S&S eqn 6.18 with \eqn{s=T}, \eqn{t1=t}, \eqn{t2=t-1}). State lag-one second moments \eqn{\textrm{E}[\mathbf{X}_t\mathbf{X}_{t-1}^\top|\mathbf{y}_1^T]}{E[X(t)X(t-1)'| y(1:T)]} is equal to \eqn{\mathbf{V}_{t, t-1}^T + \mathbf{x}_t^T(\mathbf{x}_{t-1}^T)^\top}{Vtt1T(t)+xtT(t)xtT(t-1)'}. +#' * x0T: Initial smoothed state estimate \eqn{\textrm{E}[\mathbf{X}_{t0}|\mathbf{y}_1^T}{E[X(t0) | y(1:T)]} (m x 1). If `model$tinitx=0`, \eqn{t0=0}; if `model$tinitx=1`, \eqn{t0=1}. Kalman smoother output. +#' * x01T: Smoothed state estimate \eqn{\textrm{E}[\mathbf{X}_1|\mathbf{y}_1^T}{E[X(1) | y(1:T)]} (m x 1). +#' * x00T: Smoothed state estimate \eqn{\textrm{E}[\mathbf{X}_0 |\mathbf{y}_1^T}{E[X(0) | y(1:T)]} (m x 1). If `model$tinitx=1`, this will be NA. +#' * V0T: Initial smoothed state covariance matrix \eqn{\textrm{E}[\mathbf{X}_{t0}\mathbf{X}_0^\top | \mathbf{y}_1^T}{E[X(t0)X(0)' | y(1:T)]} (m x m). If `model$tinitx=0`, \eqn{t0=0} and `V0T=V00T`; if `model$tinitx=1`, \eqn{t0=1} and `V0T=V10T`. In the case of `tinitx=0`, this is \eqn{P_0^T} in S&S. +#' * V10T: Smoothed state covariance matrix \eqn{\textrm{E}[\mathbf{X}_1\mathbf{X}_0^\top | \mathbf{y}_1^T}{E[X(1)X(0)' | y(1:T)]} (m x m). +#' * V00T: Smoothed state covariance matrix \eqn{\textrm{E}[\mathbf{X}_0\mathbf{X}_0^\top | \mathbf{y}_1^T}{E[X(0)X(0)' | y(1:T)]} (m x m). If `model$tinitx=1`, this will be NA. +#' * J: (m x m x T) Kalman smoother output. Only for `MARSSkfss()`. (ref S&S eqn 6.49) +#' * J0: J at the initial time (t=0 or t=1) (m x m x T). Kalman smoother output. Only for `MARSSkfss()`. +#' * xtt: State first moment conditioned on \eqn{\mathbf{y}_1^t}{y(1:t)}: \eqn{\textrm{E}[\mathbf{X}_t | \mathbf{y}_1^t}{E[X(t) | y(1:t)]} (m x T). Kalman filter output. (S&S eqn 6.17 with \eqn{s=t}) +#' * xtt1: State first moment conditioned on \eqn{\mathbf{y}_1^{t-1}}{y(1:t-1)}: \eqn{\textrm{E}[\mathbf{X}_t | \mathbf{y}_1^{t-1}}{E[X(t) | y(1:t-1)]} (m x T). Kalman filter output. (S&S eqn 6.17 with \eqn{s=t-1}) +#' * Vtt: State variance conditioned on \eqn{\mathbf{y}_1^t}{y(1:t)}: \eqn{\textrm{E}[(\mathbf{X}_t-\mathbf{x}_t^t)(\mathbf{X}_t-\mathbf{x}_t^t)^\top|\mathbf{y}_1^t]}{E[(X(t)-xtt(t))(X(t)-xtt(t))'| | y(1:t)]} (m x m x T array). Kalman filter output. \eqn{P_t^t} in S&S (S&S eqn 6.18 with s=t, t1=t2=t). The state second moment \eqn{\textrm{E}[\mathbf{X}_t\mathbf{X}_t^\top|\mathbf{y}_1^t]}{E[X(t)X(t)'| y(1:t)]} is equal to \eqn{\mathbf{V}_t^t + \mathbf{x}_t^t(\mathbf{x}_t^t)^\top}{Vtt(t)+xtt(t)xtt(t)'}. +#' * Vtt1: State variance conditioned on \eqn{\mathbf{y}_1^{t-1}}{y(1:t-1)}: \eqn{\textrm{E}[(\mathbf{X}_t-\mathbf{x}_t^{t-1})(\mathbf{X}_t-\mathbf{x}_t^{t-1})^\top|\mathbf{y}_1^{t-1}]}{E[(X(t)-xtt1(t))(X(t)-xtt1(t))'| | y(1:t-1)]} (m x m x T array). Kalman filter output. The state second moment \eqn{\textrm{E}[\mathbf{X}_t\mathbf{X}_t^\top|\mathbf{y}_1^{t-1}]}{E[X(t)X(t)'| y(1:t-1)]} is equal to \eqn{\mathbf{V}_t^{t-1} + \mathbf{x}_t^{t-1}(\mathbf{x}_t^{t-1})^\top}{Vtt1(t)+xtt1(t)xtt1(t)'}. +#' * Kt: Kalman gain (m x m x T). Kalman filter output (ref S&S eqn 6.23). Only for `MARSSkfss()`. +#' * Innov: Innovations \eqn{\mathbf{y}_t-\textrm{E}[\mathbf{Y}_t|\mathbf{y}_1^{t-1}]}{y(t) - E[Y(t) | y(1:t-1)]} (n x T). Kalman filter output. Only returned with `MARSSkfss()`. (ref page S&S 339). +#' * Sigma: Innovations covariance matrix. Kalman filter output. Only returned with `MARSSkfss()`. (ref S&S eqn 6.61) +#' * logLik: Log-likelihood logL(y(1:T) | Theta) (ref S&S eqn 6.62) +#' * kfas.model: The model in [KFAS::KFAS()] model form (class [KFAS::SSModel()]). Only for `MARSSkfas`. +#' * errors: Any error messages. +#' +#' @references +#' A. C. Harvey (1989). Chapter 5, Forecasting, structural time series models and the Kalman filter. Cambridge University Press. +#' +#' R. H. Shumway and D. S. Stoffer (2006). Time series analysis and its applications: with R examples. Second Edition. Springer-Verlag, New York. +#' +#' Ghahramani, Z. and Hinton, G.E. (1996) Parameter estimation for linear dynamical systems. University of Toronto Technical Report CRG-TR-96-2. +#' +#' Holmes, E. E. (2012). Derivation of the EM algorithm for constrained and unconstrained multivariate autoregressive state-space (MARSS) models. Technical Report. arXiv:1302.3919 [stat.ME] `RShowDoc("EMDerivation",package="MARSS")` to open a copy. +#' +#' Jouni Helske (2012). KFAS: Kalman filter and smoother for exponential family state space models. +#' +#' Koopman, S.J. and Durbin J. (2000). Fast filtering and smoothing for non-stationary time series models, Journal of American Statistical Association, 92, 1630-38. +#' +#' Koopman, S.J. and Durbin J. (2001). Time series analysis by state space methods. Oxford: Oxford University Press. +#' +#' Koopman, S.J. and Durbin J. (2003). Filtering and smoothing of state vector for diffuse state space models, Journal of Time Series Analysis, Vol. 24, No. 1. +#' +#' The MARSS User Guide: Holmes, E. E., E. J. Ward, and M. D. Scheuerell (2012) Analysis of multivariate time-series using the MARSS package. NOAA Fisheries, Northwest Fisheries Science Center, 2725 Montlake Blvd E., Seattle, WA 98112 Type `RShowDoc("UserGuide",package="MARSS")` to open a copy. +#' +#' @author Eli Holmes, NOAA, Seattle, USA. +#' +#' @seealso [MARSS()], [marssMODEL()], [MARSSkem()], [KFAS::KFAS()] +#' +#' @aliases MARSSkfas MARSSkfss +#' +#' @examples +#' dat <- t(harborSeal) +#' dat <- dat[2:nrow(dat), ] +#' # you can use MARSS to construct a marssMLE object +#' # MARSS calls MARSSinits to construct default initial values +#' # with fit = FALSE, the $par element of the marssMLE object will be NULL +#' fit <- MARSS(dat, fit = FALSE) +#' # MARSSkf needs a marssMLE object with the par element set +#' fit$par <- fit$start +#' # Compute the kf output at the params used for the inits +#' kfList <- MARSSkf(fit) +#' +#' @export MARSSkf <- function(MLEobj, only.logLik = FALSE, return.lag.one = TRUE, return.kfas.model = FALSE, newdata = NULL, smoother = TRUE) { if (is.null(MLEobj$par)) { stop("Stopped in MARSSkf(): par element of marssMLE object is required.\n") diff --git a/R/MARSSoptim.r b/R/MARSSoptim.r index 040025a..d7b4b8a 100644 --- a/R/MARSSoptim.r +++ b/R/MARSSoptim.r @@ -4,6 +4,138 @@ # Q and R are not allowed to be time-varying # Likelihood computation is via Kalman filter ####################################################################################################### +#' @title Parameter estimation for MARSS models using optim +#' +#' @description +#' Parameter estimation for MARSS models using R's [optim()] function. This +#' allows access to R's quasi-Newton algorithms available in that function. The +#' `MARSSoptim()` function is called when [MARSS()] is called with +#' `method="BFGS"`. This is an internal function in the [MARSS-package]. +#' +#' @param MLEobj An object of class [marssMLE]. +#' +#' @details +#' Objects of class [marssMLE] may be built from scratch but are easier to +#' construct using [MARSS()] called with `MARSS(..., fit=FALSE, method="BFGS")`. +#' +#' Options for [optim()] are passed in using `MLEobj$control`. See [optim()] +#' for a list of that function's control options. If `lower` and `upper` for +#' [optim()] need to be passed in, they should be passed in as part of +#' `control` as `control$lower` and `control$upper`. Additional `control` +#' arguments affect printing and initial conditions. +#' +#' * `MLEobj$control$kf.x0`: The initial condition is at \eqn{t=0} if +#' `kf.x0="x00"`. The initial condition is at \eqn{t=1} if `kf.x0="x10"`. +#' * `MLEobj$marss$diffuse`: If `diffuse=TRUE`, a diffuse initial condition is +#' used. `MLEobj$par$V0` is then the scaling function for the diffuse part of +#' the prior. Thus the prior is `V0*kappa` where `kappa-->Inf`. Note that +#' setting a diffuse prior does not change the correlation structure within +#' the prior. If `diffuse=FALSE`, a non-diffuse prior is used and +#' `MLEobj$par$V0` is the non-diffuse prior variance on the initial states. +#' The prior is `V0`. +#' * `MLEobj$control$silent`: Suppresses printing of progress bars, error +#' messages, warnings and convergence information. +#' +#' @return +#' The [marssMLE] object which was passed in, with additional components: +#' +#' * `method`: String `"BFGS"`. +#' * `kf`: Kalman filter output. +#' * `iter.record`: If `MLEobj$control$trace = TRUE`, then this is the +#' `$message` value from [optim]. +#' * `numIter`: Number of iterations needed for convergence. +#' * `convergence`: Did estimation converge successfully? +#' * `convergence=0`: Converged in less than `MLEobj$control$maxit` +#' iterations and no evidence of degenerate solution. +#' * `convergence=3`: No convergence diagnostics were computed because all +#' parameters were fixed thus no fitting required. +#' * `convergence=-1`: No convergence diagnostics were computed because the +#' MLE object was not fit (called with `fit=FALSE`). This isn't a +#' convergence error just information. There is no `par` element so no +#' functions can be run with the object. +#' * `convergence=1`: Maximum number of iterations `MLEobj$control$maxit` +#' was reached before `MLEobj$control$abstol` condition was satisfied. +#' * `convergence=10`: Some of the variance elements appear to be degenerate. +#' * `convergence=52`: The algorithm was abandoned due to errors from the +#' `"L-BFGS-B"` method. +#' * `convergence=53`: The algorithm was abandoned due to numerical errors in +#' the likelihood calculation from [MARSSkf]. If this happens with +#' `"BFGS"`, it can sometimes be helped with a better initial condition. +#' Try using the EM algorithm first (`method="kem"`), and then using the +#' parameter estimates from that as initial conditions for `method="BFGS"`. +#' * `convergence=54`: The algorithm successfully fit the model but the +#' Kalman filter/smoother could not be run on the model. Consult +#' `MARSSinfo('optimerror54')` for insight. +#' * `logLik`: Log-likelihood. +#' * `states`: State estimates from the Kalman smoother. +#' * `states.se`: Confidence intervals based on state standard errors, see +#' caption of Fig 6.3 (p. 337) in Shumway & Stoffer (2006). +#' * `errors`: Any error messages. +#' +#' @section Discussion: +#' The function only returns parameter estimates. To compute CIs, use +#' [MARSSparamCIs] but if you use parametric or non-parametric bootstrapping +#' with this function, it will use the EM algorithm to compute the bootstrap +#' parameter estimates! The quasi-Newton estimates are too fragile for the +#' bootstrap routine since one often needs to search to find a set of initial +#' conditions that work (i.e. don't lead to numerical errors). +#' +#' Estimates from `MARSSoptim` (which come from [optim]) should be checked +#' against estimates from the EM algorithm. If the quasi-Newton algorithm +#' works, it will tend to find parameters with higher likelihood faster than +#' the EM algorithm. However, the MARSS likelihood surface can be multimodal +#' with sharp peaks at degenerate solutions where a \eqn{\mathbf{Q}}{Q} or +#' \eqn{\mathbf{R}}{R} diagonal element equals 0. The quasi-Newton algorithm +#' sometimes gets stuck on these peaks even when they are not the maximum. +#' Neither an initial conditions search nor starting near the known maximum (or +#' from the parameters estimates after the EM algorithm) will necessarily solve +#' this problem. Thus it is wise to check against EM estimates to ensure that +#' the BFGS estimates are close to the MLE estimates (and vis-a-versa, it's +#' wise to rerun with `method="BFGS"` after using `method="kem"`). Conversely, +#' if there is a strong flat ridge in your likelihood, the EM algorithm can +#' report early convergence while the BFGS may continue much further along the +#' ridge and find very different parameter values. Of course a likelihood +#' surface with strong flat ridges makes the MLEs less informative... +#' +#' Note this is mainly a problem if the time series are short or very gappy. If +#' the time series are long, then the likelihood surface should be nice with a +#' single interior peak. In this case, the quasi-Newton algorithm works well +#' but it can still be sensitive (and slow) if not started with a good initial +#' condition. Thus starting it with the estimates from the EM algorithm is +#' often desirable. +#' +#' One should be aware that the prior set on the variance of the initial states +#' at t=0 or t=1 can have catastrophic effects on one's estimates if the +#' presumed prior covariance structure conflicts with the structure implied by +#' the MARSS model. For example, if you use a diagonal variance-covariance +#' matrix for the prior but the model implies a variance-covariance matrix with +#' non-zero covariances, your MLE estimates can be strongly influenced by the +#' prior variance-covariance matrix. Setting a diffuse prior does not help +#' because the diffuse prior still has the correlation structure specified by +#' V0. One way to detect priors effects is to compare the BFGS estimates to the +#' EM estimates. Persistent differences typically signify a problem with the +#' correlation structure in the prior conflicting with the implied correlation +#' structure in the MARSS model. +#' +#' @author Eli Holmes, NOAA, Seattle, USA. +#' +#' @seealso [MARSS()], [MARSSkem()], [marssMLE()], [optim()] +#' +#' @examples +#' dat <- t(harborSealWA) +#' dat <- dat[2:4, ] # remove the year row +#' +#' # fit a model with EM and then use that fit as the start for BFGS +#' # fit a model with 1 hidden state where obs errors are iid +#' # R="diagonal and equal" is the default so not specified +#' # Q is fixed +#' kemfit <- MARSS(dat, model = list(Z = matrix(1, 3, 1), Q = matrix(.01))) +#' bfgsfit <- MARSS(dat, +#' model = list(Z = matrix(1, 3, 1), Q = matrix(.01)), +#' inits = coef(kemfit, form = "marss"), method = "BFGS" +#' ) +#' +#' @export MARSSoptim <- function(MLEobj) { # This function does not check if user specified a legal MLE object. ## diff --git a/R/MARSSparamCIs.R b/R/MARSSparamCIs.R index 734a6b2..ac08d9c 100644 --- a/R/MARSSparamCIs.R +++ b/R/MARSSparamCIs.R @@ -3,6 +3,73 @@ # This returns CIs for ML parameter estimates # If method='hessian', uses either Harvey1989 (analytical) or fdHess (numerical) or optim (numerical) #################################################################################### +#' @title Standard Errors, Confidence Intervals and Bias for MARSS Parameters +#' +#' @description +#' Computes standard errors, confidence intervals and bias for the +#' maximum-likelihood estimates of MARSS model parameters. If you want +#' confidence intervals on the estimated hidden states, see +#' [print.marssMLE()] and look for `states.cis`. +#' +#' @param MLEobj An object of class [marssMLE]. Must have a `$par` element +#' containing the MLE parameter estimates. +#' @param method Method for calculating the standard errors: `"hessian"`, +#' `"parametric"`, and `"innovations"` implemented currently. +#' @param alpha alpha level for the 1-alpha confidence intervals. +#' @param nboot Number of bootstraps to use for `"parametric"` and +#' `"innovations"` methods. +#' @param hessian.fun The function to use for computing the Hessian. Options +#' are `"Harvey1989"` (default analytical) or two numerical options: +#' `"fdHess"` and `"optim"`. See [MARSShessian]. +#' @param silent If false, a progress bar is shown for `"parametric"` and +#' `"innovations"` methods. +#' +#' @details +#' Approximate confidence intervals (CIs) on the model parameters may be +#' calculated from the observed Fisher Information matrix ("Hessian CIs", see +#' [MARSSFisherI()]) or parametric or non-parametric (innovations) bootstrapping +#' using `nboot` bootstraps. The Hessian CIs are based on the asymptotic +#' normality of MLE parameters under a large-sample approximation. The Hessian +#' computation for variance-covariance matrices is a symmetric approximation +#' and the lower CIs for variances might be negative. Bootstrap estimates of +#' parameter bias are reported if method `"parametric"` or `"innovations"` is +#' specified. +#' +#' Note, these are added to the `par` elements of a [marssMLE] object but are +#' in `"marss"` form not `"marxss"` form. Thus the `MLEobj$par.upCI` and +#' related elements that are added to the [marssMLE] object may not look +#' familiar to the user. Instead the user should extract these elements using +#' `print(MLEobj)` and passing in the argument `what` set to `"par.se"`, +#' `"par.bias"`, `"par.lowCIs"`, or `"par.upCIs"`. See +#' [print.marssMLE][print](). Or use [tidy.marssMLE][tidy](). +#' +#' @return +#' `MARSSparamCIs` returns the [marssMLE] object passed in, with additional +#' components `par.se`, `par.upCI`, `par.lowCI`, `par.CI.alpha`, +#' `par.CI.method`, `par.CI.nboot` and `par.bias` (if method is `"parametric"` +#' or `"innovations"`). +#' +#' @references +#' Holmes, E. E., E. J. Ward, and M. D. Scheuerell (2012) Analysis of +#' multivariate time-series using the MARSS package. NOAA Fisheries, Northwest +#' Fisheries Science Center, 2725 Montlake Blvd E., Seattle, WA 98112. Type +#' `RShowDoc("UserGuide", package = "MARSS")` to open a copy. +#' +#' @author Eli Holmes, NOAA, Seattle, USA. +#' +#' @seealso [MARSSboot()], [MARSSinnovationsboot()], [MARSShessian()] +#' +#' @examples +#' dat <- t(harborSealWA) +#' dat <- dat[2:4, ] +#' kem <- MARSS(dat, model = list( +#' Z = matrix(1, 3, 1), +#' R = "diagonal and unequal" +#' )) +#' kem.with.CIs.from.hessian <- MARSSparamCIs(kem) +#' kem.with.CIs.from.hessian +#' +#' @export MARSSparamCIs <- function(MLEobj, method = "hessian", alpha = 0.05, nboot = 1000, silent = TRUE, hessian.fun = "Harvey1989") { # this function expects a marssMLE object # it will add standard errors, biases, low/up CIs to MLEobj diff --git a/R/MARSSresiduals.R b/R/MARSSresiduals.R index 8d67f4e..7d6a6d4 100644 --- a/R/MARSSresiduals.R +++ b/R/MARSSresiduals.R @@ -1,3 +1,95 @@ +#' MARSS Residuals +#' +#' The normal residuals function is [residuals()][residuals.marssMLE()]. `MARSSresiduals()` returns residuals as a list of matrices while [residuals()][residuals.marssMLE()] returns the same information in a data frame. This function calculates the residuals, residuals variance, and standardized residuals for the one-step-ahead (conditioned on data up to \eqn{t-1}), the smoothed (conditioned on all the data), and contemporaneous (conditioned on data up to \eqn{t}) residuals. +#' +#' @param object An object of class `marssMLE`. +#' @param ... Additional arguments to be passed to the residuals functions. For type="tT", `Harvey=TRUE` can be passed into to use the Harvey et al (1998) algorithm. +#' @param type `"tT"` for smoothed residuals conditioned on all the data \eqn{t=1} to \eqn{T}, aka smoothation residuals. `"tt1"` for one-step-ahead residuals, aka innovations residuals. `"tt"` for contemporaneous residuals. +#' @param normalize TRUE/FALSE See details. +#' @param silent If TRUE, do not print inversion warnings. +#' @param fun.kf Kalman filter function to use. Can be ignored. +#' +#' @return A list of the following components +#' * `model.residuals`: The model residuals (data minus model predicted values) as a n x T matrix. +#' * `state.residuals`: The state residuals. This is the state residual for the transition from \eqn{t=t} to \eqn{t+1} thus the last time step will be NA (since \eqn{T+1} is past the data). State residuals do not exist for the `type="tt"` case (since this would required the expected value of \eqn{\mathbf{X}_t}{X(t)} conditioned on data to \eqn{t+1}). +#' * `residuals`: The residuals as a (n+m) x T matrix with `model.residuals` on top and `state.residuals` below. +#' * `var.residuals`: The variance of the model residuals and state residuals as a (n+m) x (n+m) x T matrix with the model residuals variance in rows/columns 1 to n and state residuals variances in rows/columns n+1 to n+m. The last time step will be all NA since the state residual is for \eqn{t=t } to \eqn{t+1}. +#' * `std.residuals`: The Cholesky standardized residuals as a (n+m) x T matrix. This is `residuals` multiplied by the inverse of the lower triangle of the Cholesky decomposition of `var.residuals`. +#' * `mar.residuals`: The marginal standardized residuals as a (n+m) x T matrix. This is `residuals` multiplied by the inverse of the diagonal matrix formed by the square-root of the diagonal of `var.residuals`. +#' * `bchol.residuals`: The Block Cholesky standardized residuals as a (n+m) x T matrix. This is `model.residuals` multiplied by the inverse of the lower triangle of the Cholesky decomposition of `var.residuals[1:n,1:n,]` and `state.residuals` multiplied by the inverse of the lower triangle of the Cholesky decomposition of `var.residuals[(n+1):(n+m),(n+1):(n+m),]`. +#' * `E.obs.residuals`: The expected value of the model residuals conditioned on the observed data. Returned as a n x T matrix. For observed data, this will be the observed model residuals. For unobserved data, this will be 0 if \eqn{\mathbf{R}}{R} is diagonal but non-zero if \eqn{\mathbf{R}}{R} is non-diagonal. See [MARSSresiduals.tT()]. +#' * `var.obs.residuals`: The variance of the model residuals conditioned on the observed data. Returned as a n x n x T matrix. For observed data, this will be 0. See [MARSSresiduals.tT()]. +#' * `msg`: Any warning messages. This will be printed unless Object$control$trace = -1 (suppress all error messages). +#' +#' @details +#' +#' For smoothed residuals, see [MARSSresiduals.tT()]. +#' +#' For one-step-ahead residuals, see [MARSSresiduals.tt1()]. +#' +#' For contemporaneous residuals, see [MARSSresiduals.tt()]. +#' +#' **Standardized residuals** +#' +#' Standardized residuals have been adjusted by the variance of the residuals at time \eqn{t} such that the variance of the residuals at time \eqn{t} equals 1. Given the normality assumption, this means that one typically sees +/- 2 confidence interval lines on standardized residuals plots. +#' +#' `std.residuals` are Cholesky standardized residuals. These are the residuals multiplied by the inverse of the lower triangle of the Cholesky decomposition of the variance matrix of the residuals: +#' \deqn{ \hat{\Sigma}_t^{-1/2} \hat{\mathbf{v}}_t.}{ hatSigma_t^{-1/2} hatv_t.} +#' These residuals are uncorrelated with each other, although they are not necessarily temporally uncorrelated (innovations residuals are temporally uncorrelated). +#' +#' The interpretation of the Cholesky standardized residuals is not straight-forward when the \eqn{\mathbf{Q}}{Q} and \eqn{\mathbf{R}}{R} variance-covariance matrices are non-diagonal. The residuals which were generated by a non-diagonal variance-covariance matrices are transformed into orthogonal residuals in \eqn{\textrm{MVN}(0,\mathbf{I})}{MVN(0,I)} space. For example, if v is 2x2 correlated errors with variance-covariance matrix \eqn{\mathbf{R}}{R}. The transformed residuals (from this function) for the i-th row of v is a combination of the row 1 effect and the row 1 effect plus the row 2 effect. So in this case, row 2 of the transformed residuals would not be regarded as solely the row 2 residual but rather how different row 2 is from row 1, relative to expected. If the errors are highly correlated, then the Cholesky standardized residuals can look rather non-intuitive. +#' +#' `mar.residuals` are the marginal standardized residuals. These are the residuals multiplied by the inverse of the diagonal matrix formed from the square-root of the diagonal of the variance matrix of the residuals: +#' \deqn{ \textrm{dg}(\hat{\Sigma}_t)^{-1/2} \hat{\mathbf{v}}_t,}{ dg(hatSigma_t)^{-1/2} hatv_t,} where \eqn{dg(A)} is the square matrix formed from the diagonal of \eqn{A}, aka `diag(diag(A))`. These residuals will be correlated if the variance matrix is non-diagonal. +#' +#' The Block Cholesky standardized residuals are like the Cholesky standardized residuals except that the full variance-covariance matrix is not used, only the variance-covariance matrix for the model or state residuals (respectively) is used for standardization. For the model residuals, the Block Cholesky standardized residuals will be the same as the Cholesky standardized residuals because the upper triangle of the lower triangle of the Cholesky decomposition (which is what we standardize by) is all zero. For `type="tt1"` and `type="tt"`, the Block Cholesky standardized state residuals will be the same as the Cholesky standardized state residuals because in the former, the model and state residuals are uncorrelated and in the latter, the state residuals do not exist. For `type="tT"`, the model and state residuals are correlated and the Block Cholesky standardized residuals will be different than the Cholesky standardized residuals. +#' +#' **Normalized residuals** +#' +#' If `normalize=FALSE`, the unconditional variance of \eqn{\mathbf{V}_t}{V(t)} and \eqn{\mathbf{W}_t}{W(t)} are \eqn{\mathbf{R}}{R} and \eqn{\mathbf{Q}}{Q} and the model is assumed to be written as +#' \deqn{\mathbf{y}_t = \mathbf{Z} \mathbf{x}_t + \mathbf{a} + \mathbf{v}_t}{ y(t) = Z x(t) + a + v(t)} +#' \deqn{\mathbf{x}_t = \mathbf{B} \mathbf{x}_{t-1} + \mathbf{u} + \mathbf{w}_t}{ x(t) = B x(t-1) + u + w(t)} +#' If `normalize=TRUE`, the model is assumed to be written as +#' \deqn{\mathbf{y}_t = \mathbf{Z} \mathbf{x}_t + \mathbf{a} + \mathbf{H}\mathbf{v}_t}{ y(t) = Z x(t) + a + Hv(t)} +#' \deqn{\mathbf{x}_t = \mathbf{B} \mathbf{x}_{t-1} + \mathbf{u} + \mathbf{G}\mathbf{w}_t}{ x(t) = B x(t-1) + u + Gw(t)} +#' with the variance of \eqn{\mathbf{V}_t}{V(t)} and \eqn{\mathbf{W}_t}{W(t)} equal to \eqn{\mathbf{I}}{I} (identity). +#' +#' **Missing or left-out data** +#' +#' See the discussion of residuals for missing and left-out data in [MARSSresiduals.tT()]. +#' +#' @author Eli Holmes, NOAA, Seattle, USA. +#' +#' @seealso [residuals.marssMLE()], [MARSSresiduals.tT()], [MARSSresiduals.tt1()], [plot.marssMLE()] +#' +#' @examples +#' dat <- t(harborSeal) +#' dat <- dat[c(2,11),] +#' fit <- MARSS(dat) +#' +#' #state smoothed residuals +#' state.resids1 <- MARSSresiduals(fit, type="tT")$state.residuals +#' #this is the same as +#' states <- fit$states +#' Q <- coef(fit, type="matrix")$Q +#' state.resids2 <- states[,2:30]-states[,1:29]-matrix(coef(fit,type="matrix")$U,2,29) +#' #compare the two +#' cbind(t(state.resids1[,-30]), t(state.resids2)) +#' +#' #normalize to variance of 1 +#' state.resids1 <- MARSSresiduals(fit, type="tT", normalize=TRUE)$state.residuals +#' state.resids2 <- (solve(t(chol(Q))) %*% state.resids2) +#' cbind(t(state.resids1[,-30]), t(state.resids2)) +#' +#' #one-step-ahead standardized residuals +#' MARSSresiduals(fit, type="tt1")$std.residuals +#' +#' @references +#' Holmes, E. E. 2014. Computation of standardized residuals for (MARSS) models. Technical Report. arXiv:1411.0045. +#' +#' See also the discussion and references in [MARSSresiduals.tT()], [MARSSresiduals.tt1()] and [MARSSresiduals.tt()]. +#' +#' @export MARSSresiduals <- function(object, ..., type = c("tT", "tt1", "tt"), normalize = FALSE, silent = FALSE, fun.kf = c("MARSSkfas", "MARSSkfss")) { type <- match.arg(type) fun.kf <- match.arg(fun.kf) diff --git a/R/MARSSresiduals.tT.R b/R/MARSSresiduals.tT.R index 4a6ede1..f661f53 100644 --- a/R/MARSSresiduals.tT.R +++ b/R/MARSSresiduals.tT.R @@ -1,3 +1,387 @@ +#' MARSS Smoothed Residuals +#' +#' Calculates the standardized (or auxiliary) smoothed residuals sensu Harvey, +#' Koopman and Penzer (1998). The expected values and variance for missing (or +#' left-out) data are also returned (Holmes 2014). Not exported. Access this +#' function with `MARSSresiduals(object, type="tT")`. At time \eqn{t} (in the +#' returned matrices), the model residuals are for time \eqn{t}, while the state +#' residuals are for the transition from \eqn{t} to \eqn{t+1} following the +#' convention in Harvey, Koopman and Penzer (1998). +#' +#' @param object An object of class [marssMLE()]. +#' @param Harvey TRUE/FALSE. Use the Harvey et al. (1998) algorithm or use the +#' Holmes (2014) algorithm. The values are the same except for missing values. +#' @param normalize TRUE/FALSE See details. +#' @param silent If TRUE, don't print inversion warnings. +#' @param fun.kf Kalman filter function to use. Can be ignored. +#' +#' @return A list with the following components +#' * `model.residuals`: The the observed smoothed model residuals: data minus the +#' model predictions conditioned on all observed data. This is different than +#' the Kalman filter innovations which use on the data up to time \eqn{t-1} +#' for the predictions. See details. +#' * `state.residuals`: The smoothed state residuals +#' \eqn{\mathbf{x}_{t+1}^T - \mathbf{Z} \mathbf{x}_{t}^T - \mathbf{u}}{xtT(t+1) - Z xtT(t) - u}. +#' The last time step will be NA because the last step would be for T to T+1 +#' (past the end of the data). +#' * `residuals`: The residuals conditioned on the observed data. Returned as a +#' (n+m) x T matrix with `model.residuals` in rows 1 to n and +#' `state.residuals` in rows n+1 to n+m. NAs will appear in rows 1 to n in +#' the places where data are missing. +#' * `var.residuals`: The joint variance of the model and state residuals +#' conditioned on observed data. Returned as a (n+m) x (n+m) x T matrix. For +#' Harvey=FALSE, this is Holmes (2014) equation 57. For Harvey=TRUE, this is +#' the residual variance in eqn. 24, page 113, in Harvey et al. (1998). They +#' are identical except for missing values, for those Harvey=TRUE returns 0s. +#' For the state residual variance, the last time step will be all NA because +#' the last step would be for T to T+1 (past the end of the data). +#' * `std.residuals`: The Cholesky standardized residuals as a (n+m) x T matrix. +#' This is `residuals` multiplied by the inverse of the lower triangle of the +#' Cholesky decomposition of `var.residuals`. The model standardized residuals +#' associated with the missing data are replaced with NA. +#' * `mar.residuals`: The marginal standardized residuals as a (n+m) x T matrix. +#' This is `residuals` multiplied by the inverse of the diagonal matrix formed +#' by the square-root of the diagonal of `var.residuals`. The model marginal +#' residuals associated with the missing data are replaced with NA. +#' * `bchol.residuals`: The Block Cholesky standardized residuals as a (n+m) x T +#' matrix. This is `model.residuals` multiplied by the inverse of the lower +#' triangle of the Cholesky decomposition of `var.residuals[1:n,1:n,]` and +#' `state.residuals` multiplied by the inverse of the lower triangle of the +#' Cholesky decomposition of +#' `var.residuals[(n+1):(n+m),(n+1):(n+m),]`. +#' * `E.obs.residuals`: The expected value of the model residuals conditioned on +#' the observed data. Returned as a n x T matrix. For observed data, this will +#' be the observed residuals (values in `model.residuals`). For unobserved +#' data, this will be 0 if \eqn{\mathbf{R}}{R} is diagonal but non-zero if +#' \eqn{\mathbf{R}}{R} is non-diagonal. See details. +#' * `var.obs.residuals`: The variance of the model residuals conditioned on the +#' observed data. Returned as a n x n x T matrix. For observed data, this will +#' be 0. See details. +#' * `msg`: Any warning messages. This will be printed unless +#' `Object$control$trace = -1` (suppress all error messages). +#' +#' @details +#' +#' This function returns the raw, the Cholesky standardized and the marginal +#' standardized smoothed model and state residuals. 'smoothed' means conditioned +#' on all the observed data and a set of parameters. These are the residuals +#' presented in Harvey, Koopman and Penzer (1998) pages 112-113, with the +#' addition of the values for unobserved data (Holmes 2014). If Harvey=TRUE, the +#' function uses the algorithm on page 112 of Harvey, Koopman and Penzer (1998) +#' to compute the conditional residuals and variance of the residuals. If +#' Harvey=FALSE, the function uses the equations in the technical report (Holmes +#' 2014). Unlike the innovations residuals, the smoothed residuals are +#' autocorrelated (section 4.1 in Harvey and Koopman 1992) and thus an ACF test +#' on these residuals would not reveal model inadequacy. +#' +#' The residuals matrix has a value for each time step. The residuals in column +#' \eqn{t} rows 1 to n are the model residuals associated with the data at time +#' \eqn{t}. The residuals in rows n+1 to n+m are the state residuals associated +#' with the transition from \eqn{\mathbf{x}_{t}}{x(t)} to +#' \eqn{\mathbf{x}_{t+1}}{x(t+1)}, not the transition from +#' \eqn{\mathbf{x}_{t-1}}{x(t-1)} to \eqn{\mathbf{x}_{t}}{x(t)}. Because +#' \eqn{\mathbf{x}_{t+1}}{x(t+1)} does not exist at time \eqn{T}, the state +#' residuals and associated variances at time \eqn{T} are NA. +#' +#' Below the conditional residuals and their variance are discussed. The random +#' variables are capitalized and the realizations from the random variables are +#' lower case. The random variables are \eqn{\mathbf{X}}{X}, +#' \eqn{\mathbf{Y}}{Y}, \eqn{\mathbf{V}}{V} and \eqn{\mathbf{W}}{W}. There are +#' two types of \eqn{\mathbf{Y}}{Y}. The observed \eqn{\mathbf{Y}}{Y} that are +#' used to estimate the states \eqn{\mathbf{x}}{x}. These are termed +#' \eqn{\mathbf{Y}^{(1)}}{Y(1)}. The unobserved \eqn{\mathbf{Y}}{Y} are termed +#' \eqn{\mathbf{Y}^{(2)}}{Y(2)}. These are not used to estimate the states +#' \eqn{\mathbf{x}}{x} and we may or may not know the values of +#' \eqn{\mathbf{y}^{(2)}}{y(2)}. Typically we treat +#' \eqn{\mathbf{y}^{(2)}}{y(2)} as unknown but it may be known but we did not +#' include it in our model fitting. Note that the model parameters +#' \eqn{\Theta}{Theta} are treated as fixed or known. The 'fitting' does not +#' involve estimating \eqn{\Theta}{Theta}; it involves estimating +#' \eqn{\mathbf{x}}{x}. All MARSS parameters can be time varying but the +#' \eqn{t} subscripts are left off parameters to reduce clutter. +#' +#' **Model residuals** +#' +#' \eqn{\mathbf{v}_{t}}{v(t)} is the difference between the data and the +#' predicted data at time \eqn{t} given \eqn{\mathbf{x}_{t}}{x(t)}: +#' \deqn{ \mathbf{v}_{t} = \mathbf{y}_{t} - \mathbf{Z} \mathbf{x}_{t} - \mathbf{a} - \mathbf{D}\mathbf{d}_t}{ v(t) = y(t) - Z x(t) - a - D d(t)} +#' \eqn{\mathbf{x}_{t}}{x(t)} is unknown (hidden) and our data are one +#' realization of \eqn{\mathbf{y}_{t}}{y(t)}. The observed model residuals +#' \eqn{\hat{\mathbf{v}}_{t}}{hatv(t)} are the difference between the observed +#' data and the predicted data at time \eqn{t} using the fitted model. +#' `MARSSresiduals.tT` fits the model using all the data, thus +#' \deqn{ \hat{\mathbf{v}}_{t} = \mathbf{y}_{t} - \mathbf{Z}\mathbf{x}_{t}^T - \mathbf{a} - \mathbf{D}\mathbf{d}_t}{ hatv(t) = y(t) - Z xtT(t) - a - D d(t)} +#' where \eqn{\mathbf{x}_{t}^T}{xtT(t)} is the expected value of +#' \eqn{\mathbf{X}_{t}}{X(t)} conditioned on the data from 1 to \eqn{T} (all +#' the data), i.e. the Kalman smoother estimate of the states at time \eqn{t}. +#' \eqn{\mathbf{y}_{t}}{y(t)} are your data and missing values will appear as +#' NA in the observed model residuals. These are returned as `model.residuals` +#' and rows 1 to \eqn{n} of `residuals`. +#' +#' `res1` and `res2` in the code below will be the same. +#' ``` +#' dat = t(harborSeal)[2:3,] +#' fit = MARSS(dat) +#' Z = coef(fit, type="matrix")$Z +#' A = coef(fit, type="matrix")$A +#' res1 = dat - Z %*% fit$states - A %*% matrix(1,1,ncol(dat)) +#' res2 = MARSSresiduals(fit, type="tT")$model.residuals +#' ``` +#' +#' **State residuals** +#' +#' \eqn{\mathbf{w}_{t+1}}{w(t+1)} are the difference between the state at time +#' \eqn{t+1} and the expected value of the state at time \eqn{t+1} given the +#' state at time \eqn{t}: +#' \deqn{ \mathbf{w}_{t+1} = \mathbf{x}_{t+1} - \mathbf{B} \mathbf{x}_{t} - \mathbf{u} - \mathbf{C}\mathbf{c}_{t+1}}{ w(t+1) = x(t+1) - B x(t) - u - C c(t+1)} +#' The estimated state residuals \eqn{\hat{\mathbf{w}}_{t+1}}{hatw(t+1)} are +#' the difference between estimate of \eqn{\mathbf{x}_{t+1}}{x(t+1)} minus the +#' estimate using \eqn{\mathbf{x}_{t}}{x(t)}. +#' \deqn{ \hat{\mathbf{w}}_{t+1} = \mathbf{x}_{t+1}^T - \mathbf{B}\mathbf{x}_{t}^T - \mathbf{u} - \mathbf{C}\mathbf{c}_{t+1}}{ hatw(t+1) = xtT(t+1) - B xtT(t) - u - C c(t+1)} +#' where \eqn{\mathbf{x}_{t+1}^T}{xtT(t+1)} is the Kalman smoother estimate of +#' the states at time \eqn{t+1} and \eqn{\mathbf{x}_{t}^T}{xtT(t)} is the +#' Kalman smoother estimate of the states at time \eqn{t}. The estimated state +#' residuals \eqn{\mathbf{w}_{t+1}}{w(t+1)} are returned in `state.residuals` +#' and rows \eqn{n+1} to \eqn{n+m} of `residuals`. `state.residuals[,t]` is +#' \eqn{\mathbf{w}_{t+1}}{w(t+1)} (notice time subscript difference). There +#' are no NAs in the estimated state residuals as an estimate of the state +#' exists whether or not there are associated data. +#' +#' `res1` and `res2` in the code below will be the same. +#' ``` +#' dat <- t(harborSeal)[2:3,] +#' TT <- ncol(dat) +#' fit <- MARSS(dat) +#' B <- coef(fit, type="matrix")$B +#' U <- coef(fit, type="matrix")$U +#' statestp1 <- MARSSkf(fit)$xtT[,2:TT] +#' statest <- MARSSkf(fit)$xtT[,1:(TT-1)] +#' res1 <- statestp1 - B %*% statest - U %*% matrix(1,1,TT-1) +#' res2 <- MARSSresiduals(fit, type="tT")$state.residuals[,1:(TT-1)] +#' ``` +#' Note that the state residual at the last time step (not shown) will be NA +#' because it is the residual associated with \eqn{\mathbf{x}_T}{x(T)} to +#' \eqn{\mathbf{x}_{T+1}}{x(T+1)} and \eqn{T+1} is beyond the data. Similarly, +#' the variance matrix at the last time step will have NAs for the same reason. +#' +#' **Variance of the residuals** +#' +#' In a state-space model, \eqn{\mathbf{X}}{X} and \eqn{\mathbf{Y}}{Y} are +#' stochastic, and the model and state residuals are random variables +#' \eqn{\hat{\mathbf{V}}_{t}}{hatV(t)} and +#' \eqn{\hat{\mathbf{W}}_{t+1}}{hatW(t+1)}. To evaluate the residuals we +#' observed (with \eqn{\mathbf{y}^{(1)}}{y(1)}), we use the joint distribution +#' of \eqn{\hat{\mathbf{V}}_{t}, \hat{\mathbf{W}}_{t+1}}{hatV(t), hatW(t+1)} +#' across all the different possible data sets that our MARSS equations with +#' parameters \eqn{\Theta}{Theta} might generate. Denote the matrix of +#' \eqn{\hat{\mathbf{V}}_{t}, \hat{\mathbf{W}}_{t+1}}{hatV(t), hatW(t+1)}, as +#' \eqn{\widehat{\mathcal{E}}_{t}}{Epsilon(t)}. That distribution has an +#' expected value (mean) and variance: +#' \deqn{ \textrm{E}[\widehat{\mathcal{E}}_{t}] = 0; \textrm{var}[\widehat{\mathcal{E}}_{t}] = \hat{\Sigma}_{t} }{ E[Epsilon(t)] = 0; var[Epsilon(t)] = hatSigma(t)} +#' Our observed residuals (returned in `residuals`) are one sample from this +#' distribution. To standardize the observed residuals, we will use +#' \eqn{ \hat{\Sigma}_{t} }{ hatSigma(t) }. \eqn{ \hat{\Sigma}_{t} }{ hatSigma(t) } +#' is returned in `var.residuals`. Rows/columns 1 to \eqn{n} are the +#' conditional variances of the model residuals and rows/columns \eqn{n+1} to +#' \eqn{n+m} are the conditional variances of the state residuals. The +#' off-diagonal blocks are the covariances between the two types of residuals. +#' +#' **Standardized residuals** +#' +#' `MARSSresiduals` will return the Cholesky standardized residuals sensu Harvey +#' et al. (1998) in `std.residuals` for outlier and shock detection. These are +#' the model and state residuals multiplied by the inverse of the lower triangle +#' of the Cholesky decomposition of `var.residuals` (note `chol()` in R returns +#' the upper triangle thus a transpose is needed). The standardized model +#' residuals are set to NA when there are missing data. The standardized state +#' residuals however always exist since the expected value of the states exist +#' without data. The calculation of the standardized residuals for both the +#' observations and states requires the full residuals variance matrix. Since +#' the state residuals variance is NA at the last time step, the standardized +#' residual in the last time step will be all NA (for both model and state +#' residuals). +#' +#' The interpretation of the Cholesky standardized residuals is not +#' straight-forward when the \eqn{\mathbf{Q}}{Q} and \eqn{\mathbf{R}}{R} +#' variance-covariance matrices are non-diagonal. The residuals which were +#' generated by a non-diagonal variance-covariance matrices are transformed into +#' orthogonal residuals in \eqn{\textrm{MVN}(0,\mathbf{I})}{MVN(0,I)} space. +#' For example, if v is 2x2 correlated errors with variance-covariance matrix R. +#' The transformed residuals (from this function) for the i-th row of +#' \eqn{\mathbf{v}}{v} is a combination of the row 1 effect and the row 1 +#' effect plus the row 2 effect. So in this case, row 2 of the transformed +#' residuals would not be regarded as solely the row 2 residual but rather how +#' different row 2 is from row 1, relative to expected. If the errors are highly +#' correlated, then the transformed residuals can look rather non-intuitive. +#' +#' The marginal standardized residuals are returned in `mar.residuals`. These +#' are the model and state residuals multiplied by the inverse of the diagonal +#' matrix formed by the square root of the diagonal of `var.residuals`. These +#' residuals will be correlated (across the residuals at time \eqn{t}) but are +#' easier to interpret when \eqn{\mathbf{Q}}{Q} and \eqn{\mathbf{R}}{R} are +#' non-diagonal. +#' +#' The Block Cholesky standardized residuals are like the Cholesky standardized +#' residuals except that the full variance-covariance matrix is not used, only +#' the variance-covariance matrix for the model or state residuals (respectively) +#' is used for standardization. For the model residuals, the Block Cholesky +#' standardized residuals will be the same as the Cholesky standardized +#' residuals because the upper triangle of the lower triangle of the Cholesky +#' decomposition (which is what we standardize by) is all zero. For the state +#' residuals, the Block Cholesky standardization will be different because Block +#' Cholesky standardization treats the model and state residuals as independent +#' (which they are not in the smoothations case). +#' +#' **Normalized residuals** +#' +#' If `normalize=FALSE`, the unconditional variance of \eqn{\mathbf{V}_t}{V(t)} +#' and \eqn{\mathbf{W}_t}{W(t)} are \eqn{\mathbf{R}}{R} and \eqn{\mathbf{Q}}{Q} +#' and the model is assumed to be written as +#' \deqn{\mathbf{y}_t = \mathbf{Z} \mathbf{x}_t + \mathbf{a} + \mathbf{v}_t}{ y(t) = Z x(t) + a + v(t)} +#' \deqn{\mathbf{x}_t = \mathbf{B} \mathbf{x}_{t-1} + \mathbf{u} + \mathbf{w}_t}{ x(t) = B x(t-1) + u + w(t)} +#' If normalize=TRUE, the model is assumed to be written +#' \deqn{\mathbf{y}_t = \mathbf{Z} \mathbf{x}_t + \mathbf{a} + \mathbf{H}\mathbf{v}_t}{ y(t) = Z x(t) + a + Hv(t)} +#' \deqn{\mathbf{x}_t = \mathbf{B} \mathbf{x}_{t-1} + \mathbf{u} + \mathbf{G}\mathbf{w}_t}{ x(t) = B x(t-1) + u + Gw(t)} +#' with the variance of \eqn{\mathbf{V}_t}{V(t)} and \eqn{\mathbf{W}_t}{W(t)} +#' equal to \eqn{\mathbf{I}}{I} (identity). +#' +#' `MARSSresiduals.tT` returns the residuals defined as in the first equations. +#' To get the residuals defined as Harvey et al. (1998) define them (second +#' equations), then use `normalize=TRUE`. In that case the unconditional +#' variance of residuals will be \eqn{\mathbf{I}}{I} instead of +#' \eqn{\mathbf{Q}}{Q} and \eqn{\mathbf{R}}{R}. +#' +#' **Missing or left-out data** +#' +#' \eqn{ \textrm{E}[\widehat{\mathcal{E}}_{t}] }{ E[Epsilon(t)] } and +#' \eqn{ \textrm{var}[\widehat{\mathcal{E}}_{t}] }{ var[Epsilon(t)] } are for +#' the distribution across all possible \eqn{\mathbf{X}}{X} and +#' \eqn{\mathbf{Y}}{Y}. We can also compute the expected value and variance +#' conditioned on a specific value of \eqn{\mathbf{Y}}{Y}, the one we observed +#' \eqn{\mathbf{y}^{(1)}}{y(1)} (Holmes 2014). If there are no missing values, +#' this is not very interesting as +#' \eqn{\textrm{E}[\hat{\mathbf{V}}_{t}|\mathbf{y}^{(1)}]=\hat{\mathbf{v}}_{t}}{E[hatV(t)|y(1)] = hatv(t)} +#' and +#' \eqn{\textrm{var}[\hat{\mathbf{V}}_{t}|\mathbf{y}^{(1)}] = 0}{var[hatV(t)|y(1)] = 0}. +#' If we have data that are missing because we left them out, however, +#' \eqn{\textrm{E}[\hat{\mathbf{V}}_{t}|\mathbf{y}^{(1)}]}{E[hatV(t)|y(1)]} +#' and +#' \eqn{\textrm{var}[\hat{\mathbf{V}}_{t}|\mathbf{y}^{(1)}]}{var[hatV(t)|y(1)]} +#' are the values we need to evaluate whether the left-out data are unusual +#' relative to what you expect given the data you did collect. +#' +#' `E.obs.residuals` is the conditional expected value +#' \eqn{\textrm{E}[\hat{\mathbf{V}}|\mathbf{y}^{(1)}]}{E[hatV(t)|y(1)]} +#' (notice small \eqn{\mathbf{y}}{y}). It is +#' \deqn{\textrm{E}[\mathbf{Y}_{t}|\mathbf{y}^{(1)}] - \mathbf{Z}\mathbf{x}_t^T - \mathbf{a} }{ E[Y(t)|y(1)] - Z xtT(t) - a} +#' It is similar to \eqn{\hat{\mathbf{v}}_{t}}{hatv(t)}. The difference is the +#' \eqn{\mathbf{y}}{y} term. +#' \eqn{\textrm{E}[\mathbf{Y}^{(1)}_{t}|\mathbf{y}^{(1)}] }{ E[Y(1)(t)|y(1)] } +#' is \eqn{\mathbf{y}^{(1)}_{t}}{y(1)(t)} for the non-missing values. For the +#' missing values, the value depends on \eqn{\mathbf{R}}{R}. If +#' \eqn{\mathbf{R}}{R} is diagonal, +#' \eqn{\textrm{E}[\mathbf{Y}^{(2)}_{t}|\mathbf{y}^{(1)}] }{ E[Y(2)(t)|y(1)] } +#' is \eqn{\mathbf{Z}\mathbf{x}_t^T + \mathbf{a}}{Z xtT(t) + a} and the +#' expected residual value is 0. If \eqn{\mathbf{R}}{R} is non-diagonal +#' however, it will be non-zero. +#' +#' `var.obs.residuals` is the conditional variance +#' \eqn{\textrm{var}[\hat{\mathbf{V}}|\mathbf{y}^{(1)}]}{var[hatV(t)|y(1)]} +#' (eqn 24 in Holmes (2014)). For the non-missing values, this variance is 0 +#' since +#' \eqn{\hat{\mathbf{V}}|\mathbf{y}^{(1)}}{hatV(t)|y(1)} is a fixed value. For +#' the missing values, +#' \eqn{\hat{\mathbf{V}}|\mathbf{y}^{(1)}}{hatV(t)|y(1)} is not fixed because +#' \eqn{\mathbf{Y}^{(2)}}{Y(2)} is a random variable. For these values, the +#' variance of \eqn{\hat{\mathbf{V}}|\mathbf{y}^{(1)}}{hatV(t)|y(1)} is +#' determined by the variance of \eqn{\mathbf{Y}^{(2)}}{Y(2)} conditioned on +#' \eqn{\mathbf{Y}^{(1)}=\mathbf{y}^{(1)}}{Y(1)=y(1)}. This variance matrix is +#' returned in `var.obs.residuals`. The variance of +#' \eqn{\hat{\mathbf{W}}|\mathbf{y}^{(1)}}{hatW(t)|y(1)} is 0 and thus is not +#' included. +#' +#' The variance +#' \eqn{\textrm{var}[\hat{\mathbf{V}}_{t}|\mathbf{Y}^{(1)}] }{ var[hatV(t)|Y(1)] } +#' (uppercase \eqn{ \mathbf{Y} }{Y}) returned in the 1 to \eqn{n} +#' rows/columns of `var.residuals` may also be of interest depending on what +#' you are investigating with regards to missing values. For example, it may be +#' of interest in a simulation study or cases where you have multiple replicated +#' \eqn{\mathbf{Y}}{Y} data sets. `var.residuals` would allow you to determine +#' if the left-out residuals are unusual with regards to what you would expect +#' for left-out data in that location of the \eqn{\mathbf{Y}}{Y} matrix but not +#' specifically relative to the data you did collect. If \eqn{\mathbf{R}}{R} is +#' non-diagonal and the \eqn{\mathbf{y}^{(1)}}{y(1)} and +#' \eqn{\mathbf{y}^{(2)}}{y(2)} are highly correlated, the variance of +#' \eqn{\textrm{var}[\hat{\mathbf{V}}_{t}|\mathbf{Y}^{(1)}] }{ var[hatV(t)|Y(1)] } +#' and variance of +#' \eqn{\textrm{var}[\hat{\mathbf{V}}_{t}|\mathbf{y}^{(1)}] }{ var[hatV(t)|y(1)] } +#' for the left-out data would be quite different. In the latter, the variance +#' is low because \eqn{\mathbf{y}^{(1)} }{ y(1) } has strong information about +#' \eqn{\mathbf{y}^{(2)} }{ y(2) }. In the former, we integrate over +#' \eqn{\mathbf{Y}^{(1)} }{ Y(1) } and the variance could be high (depending on +#' the parameters). +#' +#' Note, if `Harvey=TRUE` then the rows and columns of `var.residuals` +#' corresponding to missing values will be NA. This is because the Harvey et al. +#' algorithm does not compute the residual variance for missing values. +#' +#' @author Eli Holmes, NOAA, Seattle, USA. +#' +#' @seealso [MARSSresiduals()], [MARSSresiduals.tt1()], [fitted.marssMLE()], [plot.marssMLE()] +#' +#' @examples +#' dat <- t(harborSeal) +#' dat <- dat[c(2, 11), ] +#' fit <- MARSS(dat) +#' +#' # state residuals +#' state.resids1 <- MARSSresiduals(fit, type = "tT")$state.residuals +#' # this is the same as hatx_t-(hatx_{t-1}+u) +#' states <- fit$states +#' state.resids2 <- states[, 2:30] - states[, 1:29] - matrix(coef(fit, type = "matrix")$U, 2, 29) +#' # compare the two +#' cbind(t(state.resids1[, -30]), t(state.resids2)) +#' +#' # normalize the state residuals to a variance of 1 +#' Q <- coef(fit, type = "matrix")$Q +#' state.resids1 <- MARSSresiduals(fit, type = "tT", normalize = TRUE)$state.residuals +#' state.resids2 <- (solve(t(chol(Q))) %*% state.resids2) +#' cbind(t(state.resids1[, -30]), t(state.resids2)) +#' +#' # Cholesky standardized (by joint variance) model & state residuals +#' MARSSresiduals(fit, type = "tT")$std.residuals +#' +#' # Returns residuals in a data frame in long form +#' residuals(fit, type = "tT") +#' +#' @references +#' Harvey, A., S. J. Koopman, and J. Penzer. 1998. Messy time series: a unified +#' approach. Advances in Econometrics 13: 103-144 (see page 112-113). Equation +#' 21 is the Kalman eqns. Eqn 23 and 24 is the backward recursion to compute +#' the smoothations. This function uses the MARSSkf output for eqn 21 and then +#' implements the backwards recursion in equation 23 and equation 24. Pages +#' 120-134 discuss the use of standardized residuals for outlier and structural +#' break detection. +#' +#' de Jong, P. and J. Penzer. 1998. Diagnosing shocks in time series. Journal +#' of the American Statistical Association 93: 796-806. This one shows the same +#' equations; see eqn 6. This paper mentions the scaling based on the inverse of +#' the sqrt (Cholesky decomposition) of the variance-covariance matrix for the +#' residuals (model and state together). This is in the right column, half-way +#' down on page 800. +#' +#' Koopman, S. J., N. Shephard, and J. A. Doornik. 1999. Statistical algorithms +#' for models in state space using SsfPack 2.2. Econometrics Journal 2: 113-166. +#' (see pages 147-148). +#' +#' Harvey, A. and S. J. Koopman. 1992. Diagnostic checking of +#' unobserved-components time series models. Journal of Business & Economic +#' Statistics 4: 377-389. +#' +#' Holmes, E. E. 2014. Computation of standardized residuals for (MARSS) models. +#' Technical Report. arXiv:1411.0045. +#' +#' @export MARSSresiduals.tT <- function(object, Harvey = FALSE, normalize = FALSE, silent = FALSE, fun.kf = c("MARSSkfas", "MARSSkfss")) { # These are the residuals and their variance conditioned on all the data # Harvey=TRUE uses Harvey et al (1998) algorithm to compute these diff --git a/R/MARSSresiduals_tt.R b/R/MARSSresiduals_tt.R index 1f88d63..7e74d7e 100644 --- a/R/MARSSresiduals_tt.R +++ b/R/MARSSresiduals_tt.R @@ -1,3 +1,89 @@ +#' MARSS Contemporaneous Residuals +#' +#' @description +#' Calculates the standardized (or auxiliary) contemporaneous residuals, aka +#' the residuals and their variance conditioned on the data up to time \eqn{t}. +#' Contemporaneous residuals are only for the observations. Not exported. +#' Access this function with `MARSSresiduals(object, type="tt")`. +#' +#' @param object An object of class [marssMLE]. +#' @param method Algorithm to use. Currently only "SS". +#' @param normalize TRUE/FALSE See details. +#' @param silent If TRUE, don't print inversion warnings. +#' @param fun.kf Can be ignored. This will change the Kalman filter/smoother +#' function from the value in `object$fun.kf` if desired. +#' +#' @return +#' A list with the following components: +#' +#' * `model.residuals`: The observed contemporaneous model residuals: data minus +#' the model predictions conditioned on the data 1 to t. A n x T matrix. NAs +#' will appear where the data are missing. +#' * `state.residuals`: All NA. There are no contemporaneous residuals for the +#' states. +#' * `residuals`: The residuals. `model.residuals` are in rows 1:n and +#' `state.residuals` are in rows n+1:n+m. +#' * `var.residuals`: The joint variance of the residuals conditioned on +#' observed data from 1 to t. This only has values in the 1:n,1:n upper block +#' for the model residuals. +#' * `std.residuals`: The Cholesky standardized residuals as a n+m x T matrix. +#' Rows n+1:n+m are all NA. +#' * `mar.residuals`: The marginal standardized residuals as a n+m x T matrix. +#' * `bchol.residuals`: Because state residuals do not exist, this will be +#' equivalent to the Cholesky standardized residuals, `std.residuals`. +#' * `E.obs.residuals`: The expected value of the model residuals conditioned on +#' the observed data 1 to t. Returned as a n x T matrix. +#' * `var.obs.residuals`: The variance of the model residuals conditioned on the +#' observed data. Returned as a n x n x T matrix. For observed data, this will +#' be 0. See [MARSSresiduals.tT()] for a discussion. +#' * `msg`: Any warning messages. +#' +#' @details +#' This function returns the conditional expected value (mean) and variance of +#' the model contemporaneous residuals. 'conditional' means conditioned on the +#' observed data up to time \eqn{t} and a set of parameters. +#' +#' **Model residuals** +#' +#' \eqn{\mathbf{v}_t}{v(t)} is the difference between the data and the +#' predicted data at time \eqn{t} given \eqn{\mathbf{x}_t}{x(t)}: +#' \deqn{ \mathbf{v}_t = \mathbf{y}_t - \mathbf{Z} \mathbf{x}_t - \mathbf{a} - \mathbf{d}\mathbf{d}_{t}}{ v(t) = y(t) - Z x(t) - a - D d(t)} +#' The observed model residuals use the data up to time \eqn{t}: +#' \deqn{ \hat{\mathbf{v}}_t = \mathbf{y}_t - \mathbf{Z}\mathbf{x}_t^{t} - \mathbf{a} - \mathbf{D}\mathbf{d}_{t}}{ hatv(t) = y(t) - Z xtt - a - D d(t)} +#' +#' The conditional variance is: +#' \deqn{ \hat{\Sigma}_t = \mathbf{R}+\mathbf{Z} \mathbf{V}_t^{t} \mathbf{Z}^\top }{hatSigma(t) = R + Z Vtt t(Z)} +#' +#' **Normalized residuals** +#' +#' If `normalize=FALSE`, the model is: +#' \deqn{\mathbf{y}_t = \mathbf{Z} \mathbf{x}_t + \mathbf{a} + \mathbf{v}_t}{ y(t) = Z x(t) + a + v(t)} +#' \deqn{\mathbf{x}_t = \mathbf{B} \mathbf{x}_{t-1} + \mathbf{u} + \mathbf{w}_t}{ x(t) = B x(t-1) + u + w(t)} +#' If `normalize=TRUE`: +#' \deqn{\mathbf{y}_t = \mathbf{Z} \mathbf{x}_t + \mathbf{a} + \mathbf{H}\mathbf{v}_t}{ y(t) = Z x(t) + a + Hv(t)} +#' \deqn{\mathbf{x}_t = \mathbf{B} \mathbf{x}_{t-1} + \mathbf{u} + \mathbf{G}\mathbf{w}_t}{ x(t) = B x(t-1) + u + Gw(t)} +#' with the variance of \eqn{\mathbf{V}_t}{V(t)} and \eqn{\mathbf{W}_t}{W(t)} +#' equal to \eqn{\mathbf{I}}{I} (identity). +#' +#' @author +#' Eli Holmes, NOAA, Seattle, USA. +#' +#' @seealso [MARSSresiduals.tT()], [MARSSresiduals.tt1()], [fitted.marssMLE()], [plot.marssMLE()] +#' +#' @examples +#' dat <- t(harborSeal) +#' dat <- dat[c(2, 11), ] +#' fit <- MARSS(dat) +#' +#' # Returns a matrix +#' MARSSresiduals(fit, type = "tt")$std.residuals +#' # Returns a data frame in long form +#' residuals(fit, type = "tt") +#' +#' @references +#' Holmes, E. E. 2014. Computation of standardized residuals for (MARSS) models. +#' Technical Report. arXiv:1411.0045. +#' @export MARSSresiduals.tt <- function(object, method = c("SS"), normalize = FALSE, silent = FALSE, fun.kf = c("MARSSkfas", "MARSSkfss")) { # These are the residuals and their variance conditioned on the data up to time t # state residuals do not exist for this case diff --git a/R/MARSSresiduals_tt1.R b/R/MARSSresiduals_tt1.R index 0c3e8d3..b41e6a2 100644 --- a/R/MARSSresiduals_tt1.R +++ b/R/MARSSresiduals_tt1.R @@ -1,3 +1,105 @@ +#' MARSS One-Step-Ahead Residuals +#' +#' @description +#' Calculates the standardized (or auxiliary) one-step-ahead residuals, aka the +#' innovations residuals and their variance. Not exported. Access this function +#' with `MARSSresiduals(object, type="tt1")`. To get the residuals as a data +#' frame in long-form, use [residuals()][residuals.marssMLE] with +#' `type="tt1"`. +#' +#' @param object An object of class [marssMLE]. +#' @param method Algorithm to use. Currently only "SS". +#' @param normalize TRUE/FALSE See details. +#' @param silent If TRUE, don't print inversion warnings. +#' @param fun.kf Can be ignored. This will change the Kalman filter/smoother +#' function from the value in `object$fun.kf` if desired. +#' +#' @return +#' A list with the following components: +#' +#' * `model.residuals`: The observed one-step-ahead model residuals: data minus +#' the model predictions conditioned on the data \eqn{t=1} to \eqn{t-1}. +#' These are termed innovations. A n x T matrix. NAs will appear where the +#' data are missing. +#' * `state.residuals`: The one-step-ahead state residuals +#' \eqn{ \mathbf{x}_{t+1}^{t+1} - \mathbf{B}\mathbf{x}_{t}^t - \mathbf{u} }{ xtt(t+1) - B xtt(t) - u}. +#' Note, state residual at time \eqn{t} is the transition from time \eqn{t=t} +#' to \eqn{t+1}. +#' * `residuals`: The residuals conditioned on the observed data up to time +#' \eqn{t-1}. Returned as a (n+m) x T matrix with `model.residuals` in rows +#' 1 to n and `state.residuals` in rows n+1 to n+m. NAs will appear in rows +#' 1 to n in the places where data are missing. +#' * `var.residuals`: The joint variance of the one-step-ahead residuals. +#' Returned as a n+m x n+m x T matrix. +#' * `std.residuals`: The Cholesky standardized residuals as a n+m x T matrix. +#' This is `residuals` multiplied by the inverse of the lower triangle of the +#' Cholesky decomposition of `var.residuals`. The model standardized residuals +#' associated with the missing data are replaced with NA. +#' * `mar.residuals`: The marginal standardized residuals as a n+m x T matrix. +#' This is `residuals` multiplied by the inverse of the diagonal matrix formed +#' by the square-root of the diagonal of `var.residuals`. The model marginal +#' residuals associated with the missing data are replaced with NA. +#' * `bchol.residuals`: The Block Cholesky standardized residuals as a (n+m) x +#' T matrix. +#' * `E.obs.residuals`: The expected value of the model residuals conditioned on +#' the observed data \eqn{t=1} to \eqn{t-1}. Returned as a n x T matrix. +#' This will be all 0s. Included for completeness. +#' * `var.obs.residuals`: For one-step-ahead residuals, this will be the same +#' as the 1:n, 1:n upper diagonal block in `var.residuals`. Included for +#' completeness and as a code check. +#' * `msg`: Any warning messages. +#' +#' @details +#' This function returns the conditional expected value (mean) and variance of +#' the one-step-ahead residuals. 'conditional' means conditioned on the observed +#' data up to time \eqn{t-1} and a set of parameters. +#' +#' **Model residuals** +#' +#' \eqn{\mathbf{v}_t}{v_t} is the difference between the data and the predicted +#' data at time \eqn{t} given \eqn{\mathbf{x}_t}{x(t)}: +#' \deqn{ \mathbf{v}_t = \mathbf{y}_t - \mathbf{Z} \mathbf{x}_t - \mathbf{a} - \mathbf{D}\mathbf{d}_t}{ v(t) = y(t) - Z x(t) - a - D d(t)} +#' The observed model residuals \eqn{\hat{\mathbf{v}}_t}{hatv(t)} use the data +#' up to time \eqn{t-1}: +#' \deqn{ \hat{\mathbf{v}}_t = \mathbf{y}_t - \mathbf{Z}\mathbf{x}_t^{t-1} - \mathbf{a} - \mathbf{D}\mathbf{d}_t}{ hatv(t) = y(t) - Z xtt1(t) - a - D d(t)} +#' +#' **State residuals** +#' +#' The estimated state residuals: +#' \deqn{ \hat{\mathbf{w}}_{t+1} = \mathbf{x}_{t+1}^{t+1} - \mathbf{B}\mathbf{x}_{t}^t - \mathbf{u} - \mathbf{C}\mathbf{c}_{t+1}}{ hatw(t+1) = xtt(t+1) - B xtt(t) - u - C c(t+1)} +#' +#' **Normalized residuals** +#' +#' If `normalize=FALSE`, the model is: +#' \deqn{\mathbf{y}_t = \mathbf{Z} \mathbf{x}_t + \mathbf{a} + \mathbf{v}_t}{ y(t) = Z x(t) + a + v(t)} +#' \deqn{\mathbf{x}_t = \mathbf{B} \mathbf{x}_{t-1} + \mathbf{u} + \mathbf{w}_t}{ x(t) = B x(t-1) + u + w(t)} +#' If `normalize=TRUE`: +#' \deqn{\mathbf{y}_t = \mathbf{Z} \mathbf{x}_t + \mathbf{a} + \mathbf{H}\mathbf{v}_t}{ y(t) = Z x(t) + a + Hv(t)} +#' \deqn{\mathbf{x}_t = \mathbf{B} \mathbf{x}_{t-1} + \mathbf{u} + \mathbf{G}\mathbf{w}_t}{ x(t) = B x(t-1) + u + Gw(t)} +#' with the variance of \eqn{\mathbf{V}_t}{V(t)} and \eqn{\mathbf{W}_t}{W(t)} +#' equal to \eqn{\mathbf{I}}{I} (identity). +#' +#' @author +#' Eli Holmes, NOAA, Seattle, USA. +#' +#' @seealso [MARSSresiduals.tT()], [MARSSresiduals.tt()], [fitted.marssMLE()], [plot.marssMLE()] +#' +#' @examples +#' dat <- t(harborSeal) +#' dat <- dat[c(2, 11), ] +#' fit <- MARSS(dat) +#' +#' MARSSresiduals(fit, type = "tt1")$std.residuals +#' residuals(fit, type = "tt1") +#' +#' @references +#' R. H. Shumway and D. S. Stoffer (2006). Section on the calculation of the +#' likelihood of state-space models in Time series analysis and its +#' applications. Springer-Verlag, New York. +#' +#' Holmes, E. E. 2014. Computation of standardized residuals for (MARSS) models. +#' Technical Report. arXiv:1411.0045. +#' @export MARSSresiduals.tt1 <- function(object, method = c("SS"), normalize = FALSE, silent = FALSE, fun.kf = c("MARSSkfas", "MARSSkfss")) { # These are the residuals and their variance conditioned on the data up to time t-1 diff --git a/R/MARSSsimulate.R b/R/MARSSsimulate.R index 3ed7a51..f05e50e 100644 --- a/R/MARSSsimulate.R +++ b/R/MARSSsimulate.R @@ -3,6 +3,58 @@ # Parametrically simulates from a MARSS parameter list # Only works for marss form. marxss form needs to be converted to marss before this will work. ####################################################################################################### +#' Simulate Data from a MARSS Model +#' +#' Generates simulated data from a MARSS model with specified parameter +#' estimates. This is a base function in the [MARSS-package]. +#' +#' Optional argument `miss.loc` is an array of dimensions n x tSteps x nsim, +#' specifying where to put missing values in the simulated data. If missing, +#' this would be constructed using `MLEobj$marss$data`. If the locations of the +#' missing values are the same for all simulations, `miss.loc` can be a matrix +#' of `dim=c(n, tSteps)` (the original data for example). The default, if +#' `miss.loc` is left off, is that there are no missing values even if +#' `MLEobj$marss$data` has missing values. +#' +#' @param object A fitted [marssMLE] object, as output by [MARSS()]. +#' @param tSteps Number of time steps in each simulation. If left off, it is +#' taken to be consistent with `MLEobj`. +#' @param nsim Number of simulated data sets to generate. +#' @param silent Suppresses progress bar. +#' @param miss.loc Optional matrix specifying where to put missing values. See +#' Details. +#' +#' @return A list with the following components: +#' +#' * `sim.states`: Array (dim m x tSteps x nsim) of state processes simulated +#' from parameter estimates. m is the number of states (rows in X). +#' * `sim.data`: Array (dim n x tSteps x nsim) of data simulated from parameter +#' estimates. n is the number of rows of data (Y). +#' * `MLEobj`: The [marssMLE] object from which the data were simulated. +#' * `miss.loc`: Matrix identifying where missing values were placed. It should +#' be exactly the same dimensions as the data matrix. The location of NAs in +#' the miss.loc matrix indicate where the missing values are. +#' * `tSteps`: Number of time steps in each simulation. +#' * `nsim`: Number of simulated data sets generated. +#' +#' @author Eli Holmes and Eric Ward, NOAA, Seattle, USA. +#' +#' @seealso [marssMODEL], [marssMLE], [MARSSboot()] +#' +#' @examples +#' d <- harborSeal[, c(2, 11)] +#' dat <- t(d) +#' fit <- MARSS(dat) +#' +#' # simulate data that are the +#' # same length as original data and no missing data +#' sim.obj <- MARSSsimulate(fit, tSteps = dim(d)[1], nsim = 5) +#' +#' # simulate data that are the +#' # same length as original data and have missing data in the same location +#' sim.obj <- MARSSsimulate(fit, tSteps = dim(d)[1], nsim = 5, miss.loc = dat) +#' @export + simulate.marssMLE <- function(object, nsim = 1, seed, ..., tSteps = NULL, silent = TRUE, miss.loc = NULL) { MARSSsimulate(object, tSteps = tSteps, nsim = nsim, silent = silent, miss.loc = miss.loc) } diff --git a/R/MARSSvectorizeparam.R b/R/MARSSvectorizeparam.R index c2ea165..6bafa27 100644 --- a/R/MARSSvectorizeparam.R +++ b/R/MARSSvectorizeparam.R @@ -2,6 +2,38 @@ # MARSSvectorizeparam function # Returns a vector of the ESTIMATED parameters or if vector passed in, that is put into list form for MLEobj$marss ####################################################################################################### +#' Vectorize or Replace the par List +#' +#' Converts `MLEobj[["what"]]` to a vector or assigns a vector to +#' `MLEobj[["what"]]`. This is a utility function in the [MARSS-package] for +#' [marssMODEL] objects of form="marss" and is not exported. Users achieve this +#' functionality with [coef.marssMLE]. +#' +#' Utility function to generate parameter vectors for optimization functions, +#' and to set `MLEobj[[what]]` using a vector of values. The function bases the +#' unlisting and naming order on `names(MLEobj$marss$fixed)`. Appends matrix +#' name to the row names in the par list. +#' +#' @param MLEobj An object of class [marssMLE]. +#' @param parvec NA or a vector. See Value. +#' @param what What part of the MLEobj is being replaced or vectorized. Need to +#' be a par list. +#' +#' @return If parvec=NA, a vector of the elements of the `what` element. +#' Otherwise, a [marssMLE] object with `MLEobj[["what"]]` set by parvec. +#' +#' @author Eli Holmes and Kellie Wills, NOAA, Seattle, USA. +#' +#' @seealso [marssMLE] +#' +#' @examples +#' dat <- t(harborSealWA) +#' dat <- dat[2:4, ] +#' kem <- MARSS(dat) +#' paramvec <- MARSS:::MARSSvectorizeparam(kem) +#' paramvec +#' @keywords internal + MARSSvectorizeparam <- function(MLEobj, parvec = NA, what = "par") { # This helper function ONLY FOR marssMODEL form=marss!! # if parvec=NA) returns a vector version of all the estimated parameters (for use in say optim) from a mssm model diff --git a/Rplots.pdf b/Rplots.pdf deleted file mode 100644 index b237141..0000000 Binary files a/Rplots.pdf and /dev/null differ diff --git a/man/CSEGriskfigure.Rd b/man/CSEGriskfigure.Rd deleted file mode 100644 index 4ddcfe1..0000000 --- a/man/CSEGriskfigure.Rd +++ /dev/null @@ -1,53 +0,0 @@ -\name{CSEGriskfigure} -\alias{CSEGriskfigure} -\keyword{experimental} -\title{ Plot Extinction Risk Metrics } -\description{ - Generates a six-panel plot of extinction risk metrics used in Population Viability Analysis (PVA). This is a function used by one of the vignettes in the \code{\link{MARSS-package}}. -} -\usage{ -CSEGriskfigure(data, te = 100, absolutethresh = FALSE, threshold = 0.1, - datalogged = FALSE, silent = FALSE, return.model = FALSE, - CI.method = "hessian", CI.sim = 1000) -} -\arguments{ - \item{data}{ A data matrix with 2 columns; time in first column and counts in second column. Note time is down rows, which is different than the base \code{\link{MARSS-package}} functions.} - \item{te}{ Length of forecast period (positive integer) } - \item{absolutethresh}{ Is extinction threshold an absolute number? (T/F)} - \item{threshold}{ Extinction threshold either as an absolute number, if \code{absolutethresh=TRUE}, or as a fraction of current population count, if \code{absolutethresh=FALSE}. } - \item{datalogged}{ Are the data already logged? (T/F)} - \item{silent}{ Suppress printed output? (T/F) } - \item{return.model}{ Return state-space model as \code{\link{marssMLE}} object? (T/F)} - \item{CI.method}{ Confidence interval method: "hessian", "parametrc", "innovations", or "none". See \code{\link{MARSSparamCIs}}. } - \item{CI.sim}{ Number of simulations for bootstrap confidence intervals (positive integer). } -} -\details{ - Panel 1: Time-series plot of the data. - Panel 2: CDF of extinction risk. - Panel 3: PDF of time to reach threshold. - Panel 4: Probability of reaching different thresholds during forecast period. - Panel 5: Sample projections. - Panel 6: TMU plot (uncertainty as a function of the forecast). -} -\value{ - If \code{return.model=TRUE}, an object of class \code{\link{marssMLE}}. -} -\references{ -Holmes, E. E., E. J. Ward, and M. D. Scheuerell (2012) Analysis of multivariate time-series using the MARSS package. NOAA Fisheries, Northwest Fisheries Science -Center, 2725 Montlake Blvd E., Seattle, WA 98112 Type \code{RShowDoc("UserGuide",package="MARSS")} to open a copy. - -(theory behind the figure) Holmes, E. E., J. L. Sabo, S. V. Viscido, and W. F. Fagan. (2007) A statistical approach to quasi-extinction forecasting. Ecology Letters 10:1182-1198. - -(CDF and PDF calculations) Dennis, B., P. L. Munholland, and J. M. Scott. (1991) Estimation of growth and extinction parameters for endangered species. Ecological Monographs 61:115-143. - -(TMU figure) Ellner, S. P. and E. E. Holmes. (2008) Resolving the debate on when extinction risk is predictable. Ecology Letters 11:E1-E5. - } -\author{ - Eli Holmes, NOAA, Seattle, USA, and Steve Ellner, Cornell Univ. -} -\seealso{ \code{\link{MARSSboot}}, \code{\link{marssMLE}}, \code{\link{CSEGtmufigure}} } -\examples{ -d <- harborSeal[, 1:2] -kem <- CSEGriskfigure(d, datalogged = TRUE) -} -\keyword{ hplot } diff --git a/man/CSEGtmufigure.Rd b/man/CSEGtmufigure.Rd deleted file mode 100644 index ad56111..0000000 --- a/man/CSEGtmufigure.Rd +++ /dev/null @@ -1,38 +0,0 @@ -\name{CSEGtmufigure} -\alias{CSEGtmufigure} -\keyword{experimental} -\title{ Plot Forecast Uncertainty } -\description{ - Plot the uncertainty in the probability of hitting a percent threshold (quasi-extinction) for a single random walk trajectory. This is the quasi-extinction probability used in Population Viability Analysis. The uncertainty is shown as a function of the forecast, where the forecast is defined in terms of the forecast length (number of time steps) and forecasted decline (percentage). This is a function used by one of the vignettes in the \code{\link{MARSS-package}}. -} -\usage{ -CSEGtmufigure(N = 20, u = -0.1, s2p = 0.01, make.legend = TRUE) -} -\arguments{ - \item{N}{ Time steps between the first and last population data point (positive integer) } - \item{u}{ Per time-step decline (-0.1 means a 10\% decline per time step; 1 means a doubling per time step.) } - \item{s2p}{ Process variance (Q). (a positive number)} - \item{make.legend}{ Add a legend to the plot? (T/F) } -} -\details{ - This figure shows the region of high uncertainty in dark grey. In this region, the minimum 95 percent confidence intervals on the probability of quasi-extinction span 80 percent of the 0 to 1 probability. Green hashing indicates where the 95 percent upper bound does not exceed 5\% probability of quasi-extinction. The red hashing indicates, where the 95 percent lower bound is above 95\% probability of quasi-extinction. The light grey lies between these two certain/uncertain extremes. The extinction calculation is based on Dennis et al. (1991). The minimum theoretical confidence interval is based on Fieberg and Ellner (2000). This figure was developed in Ellner and Holmes (2008). - - Examples using this figure are shown in the \href{https://cran.r-project.org/package=MARSS/vignettes/UserGuide.pdf}{User Guide} in the PVA chapter. -} - -\references{ -Dennis, B., P. L. Munholland, and J. M. Scott. (1991) Estimation of growth and extinction parameters for endangered species. Ecological Monographs 61:115-143. - -Fieberg, J. and Ellner, S.P. (2000) When is it meaningful to estimate an extinction probability? Ecology, 81, 2040-2047. - -Ellner, S. P. and E. E. Holmes. (2008) Resolving the debate on when extinction risk is predictable. Ecology Letters 11:E1-E5. -} -\author{ - Eli Holmes, NOAA, Seattle, USA, and Steve Ellner, Cornell Univ. -} -\seealso{ \code{\link{CSEGriskfigure}} } -\examples{ -CSEGtmufigure(N = 20, u = -0.1, s2p = 0.01) -} -\keyword{ hplot } - diff --git a/man/MARSS.Rd b/man/MARSS.Rd deleted file mode 100644 index 5f652ae..0000000 --- a/man/MARSS.Rd +++ /dev/null @@ -1,198 +0,0 @@ -\name{MARSS} -\alias{MARSS} - -\title{ Fit a MARSS Model via Maximum-Likelihood Estimation } -\description{ - This is the main function for fitting multivariate autoregressive state-space (MARSS) models with linear constraints. Scroll down to the bottom to see some short examples. To open a guide to show you how to get started quickly, type \code{RShowDoc("Quick_Start",package="MARSS")}. To open the MARSS User Guide from the command line, type \code{RShowDoc("UserGuide",package="MARSS")}. To get an overview of the package and all its main functions and how to get output (parameter estimates, fitted values, residuals, Kalmin filter or smoother output, or plots), go to \code{\link{MARSS-package}}. If \code{MARSS()} is throwing errors or warnings that you don't understand, try the Troubleshooting section of the user guide or type \code{\link{MARSSinfo}()} at the command line. - -The default MARSS model form is "marxss", which is Multivariate Auto-Regressive(1) eXogenous inputs State-Space model: -\deqn{\mathbf{x}_{t} = \mathbf{B}_t \mathbf{x}_{t-1} + \mathbf{u}_t + \mathbf{C}_t \mathbf{c}_t + \mathbf{G}_t \mathbf{w}_t, \textrm{ where } \mathbf{W}_t \sim \textrm{MVN}(0,\mathbf{Q}_t)}{x(t) = B(t) x(t-1) + u(t) + C(t) c(t) + G(t) w(t), where W(t) ~ MVN(0,Q(t))} -\deqn{\mathbf{y}_t = \mathbf{Z}_t \mathbf{x}_t + \mathbf{a}_t + \mathbf{D}_t \mathbf{d}_t + \mathbf{H}_t \mathbf{v}_t, \textrm{ where } \mathbf{V}_t \sim \textrm{MVN}(0,\mathbf{R}_t)}{y(t) = Z(t) x(t) + a(t) + D(t) d(t) + H(t) v(t), where V(t) ~ MVN(0,R(t))} -\deqn{\mathbf{X}_1 \sim \textrm{MVN}(\mathbf{x0}, \mathbf{V0}) \textrm{ or } \mathbf{X}_0 \sim \textrm{MVN}(\mathbf{x0}, \mathbf{V0}) }{X(1) ~ MVN(x0, V0) or X(0) ~ MVN(x0, V0) } -The parameters are everything except \eqn{\mathbf{x}}{x}, \eqn{\mathbf{y}}{y}, \eqn{\mathbf{v}}{v}, \eqn{\mathbf{w}}{w}, \eqn{\mathbf{c}}{c} and \eqn{\mathbf{d}}{d}. \eqn{\mathbf{y}}{y} are data (missing values allowed). \eqn{\mathbf{c}}{c} and \eqn{\mathbf{d}}{d} are inputs (no missing values allowed). All parameters (except \eqn{\mathbf{x0}}{x0} and \eqn{\mathbf{V0}}{V0}) can be time-varying but by default, all are time-constant (and the MARSS equation is generally written without the \eqn{t} subscripts on the parameter matrices). All parameters can be zero, including the variance matrices. - -The parameter matrices can have fixed values and linear constraints. This is an example of a 3x3 matrix with linear constraints. All matrix elements can be written as a linear function of \eqn{a}, \eqn{b}, and \eqn{c}: -\deqn{\left[\begin{array}{c c c} a+2b & 1 & a\\ 1+3a+b & 0 & b \\ 0 & -2 & c\end{array}\right]}{[a+2b 1 a \\n 1+3a+b 0 b \\n 0 -2 c ]} - -Values such as \eqn{a b} or \eqn{a^2} or \eqn{log(a)} are not linear constraints. - -} -\usage{ -MARSS(y, - model = NULL, - inits = NULL, - miss.value = as.numeric(NA), - method = c("kem", "BFGS", "TMB", "BFGS_TMB", "nlminb_TMB"), - form = c("marxss", "dfa", "marss"), - fit = TRUE, - silent = FALSE, - control = NULL, - fun.kf = c("MARSSkfas", "MARSSkfss"), - ...) -} - -\arguments{ -The default settings for the optional arguments are set in \code{MARSSsettings.R} and are given below in the details section. For form specific defaults see the form help file (e.g. \code{\link{MARSS.marxss}} or \code{\link{MARSS.dfa}}). - \item{y}{ A n x T matrix of n time series over T time steps. Only y is required for the function. A ts object (univariate or multivariate) can be used and this will be converted to a matrix with time in the columns. } - \item{inits}{ A list with the same form as the list outputted by \code{coef(fit)} that specifies initial values for the parameters. See also \code{\link{MARSS.marxss}}. } - \item{model}{ Model specification using a list of parameter matrix text shortcuts or matrices. See Details and \code{\link{MARSS.marxss}} for the default form. Or better yet open the Quick Start Guide \code{RShowDoc("Quick_Start",package="MARSS")}. } - \item{miss.value}{ Deprecated. Denote missing values by NAs in your data. } - \item{method}{ Estimation method. MARSS provides an EM algorithm (\code{method="kem"}) (see \code{\link{MARSSkem}}) and the BFGS algorithm (\code{method="BFGS"}) (see \code{\link{MARSSoptim}}). } - \item{form}{ The equation form used in the \code{MARSS()} call. The default is "marxss". See \code{\link{MARSS.marxss}} or \code{\link{MARSS.dfa}}. } - \item{fit}{ TRUE/FALSE Whether to fit the model to the data. If FALSE, a \code{\link{marssMLE}} object with only the model is returned. } - \item{silent}{ Setting to TRUE(1) suppresses printing of full error messages, warnings, progress bars and convergence information. Setting to FALSE(0) produces error output. Setting silent=2 will produce more verbose error messages and progress information. } - \item{fun.kf}{ What Kalman filter function to use. MARSS has two: \code{\link{MARSSkfas}()} which is based on the Kalman filter in the \href{https://cran.r-project.org/package=KFAS}{KFAS} package based on Koopman and Durbin and \code{\link{MARSSkfss}()} which is a native R implementation of the Kalman filter and smoother in Shumway and Stoffer. The KFAS filter is much faster. \code{\link{MARSSkfas}()} modifies the input and output in order to output the lag-one covariance smoother needed for the EM algorithm (per page 321 in Shumway and Stoffer (2000).} - \item{control}{ Estimation options for the maximization algorithm. The typically used control options for method="kem" are below but see \code{\link{marssMLE}} for the full list of control options. Note many of these are not allowed if method="BFGS"; see \code{\link{MARSSoptim}} for the allowed control options for this method. - \describe{ - \item{\code{minit}}{ The minimum number of iterations to do in the maximization routine (if needed by method). If \code{method="kem"}, this is an easy way to up the iterations and see how your estimates are converging. (positive integer)} - \item{\code{maxit}}{ Maximum number of iterations to be used in the maximization routine (if needed by method) (positive integer). } - \item{\code{min.iter.conv.test}}{ Minimum iterations to run before testing convergence via the slope of the log parameter versus log iterations.} - \item{\code{conv.test.deltaT=9}}{ Number of iterations to use for the testing convergence via the slope of the log parameter versus log iterations. } - \item{\code{conv.test.slope.tol}}{ The slope of the log parameter versus log iteration to use as the cut-off for convergence. The default is 0.5 which is a bit high. For final analyses, this should be set lower. If you want to only use abstol as your convergence test, then to something very large, for example \code{conv.test.slope.tol=1000}. Type \code{MARSSinfo(11)} to see some comments on when you might want to do this.} - \item{\code{abstol}}{ The logLik.(iter-1)-logLik.(iter) convergence tolerance for the maximization routine. To meet convergence both the abstol and slope tests must be passed.} - \item{\code{allow.degen}}{ Whether to try setting \eqn{\mathbf{Q}}{Q} or \eqn{\mathbf{R}}{R} elements to zero if they appear to be going to zero. } - \item{\code{trace}}{ An integer specifying the level of information recorded and error-checking run during the algorithms. \code{trace=0}, specifies basic error-checking and brief error-messages; \code{trace>0} will print full error messages. In addition if trace>0, the Kalman filter output will be added to the outputted \code{marssMLE} object. Additional information recorded depends on the method of maximization. For the EM algorithm, a record of each parameter estimate for each EM iteration will be added. See \code{\link{optim}} for trace output details for the BFGS method. \code{trace=-1} will turn off most internal error-checking and most error messages. The internal error checks are time expensive so this can speed up model fitting. This is particularly useful for bootstrapping and simulation studies. It is also useful if you get an error saying that \code{MARSS()} stops in \code{\link{MARSSkfss}()} due to a \code{chol()} call. \code{MARSSkfss()} uses matrix inversions and for some models these are unstable (high condition value). \code{MARSSkfss()} is used for error-checks and does not need to be called normally. } - \item{\code{safe}}{ Setting \code{safe=TRUE} runs the Kalman smoother after each parameter update rather than running the smoother only once after updated all parameters. The latter is faster but is not a strictly correct EM algorithm. In most cases, \code{safe=FALSE} (default) will not change the fits. If this setting does cause problems, you will know because you will see an error regarding the log-likelihood dropping and it will direct you to set \code{safe=TRUE}. } - } } - \item{...}{ Optional arguments passed to function specified by form. } -} -\details{ -The \code{model} argument specifies the structure of your model. There is a one-to-one correspondence between how you would write your model in matrix form on the whiteboard and how you specify the model for \code{MARSS()}. Many different types of multivariate time-series models can be converted to the MARSS form. See the \href{https://cran.r-project.org/package=MARSS/vignettes/UserGuide.pdf}{User Guide} and \href{https://cran.r-project.org/package=MARSS/vignettes/Quick_Start.html}{Quick Start Guide} for examples. - -The MARSS package has two forms for standard users: marxss and dfa. -\describe{ -\item{\code{\link{MARSS.marxss}}}{This is the default form. This is a MARSS model with (optional) inputs \eqn{\mathbf{c}_t}{c(t)} or \eqn{\mathbf{d}_t}{d(t)}. Most users will want this help page.} -\item{\code{\link{MARSS.dfa}}}{This is a model form to allow easier specification of models for Dynamic Factor Analysis. The \eqn{\mathbf{Z}}{Z} parameters has a specific form and the \eqn{\mathbf{Q}}{Q} is set at i.i.d (diagonal) with variance of 1.} -} -Those looking to modify or understand the base code, should look at \code{\link{MARSS.marss}} and -\code{\link{MARSS.vectorized}}. These describe the forms used by the base functions. The EM algorithm uses the MARSS model written in vectorized form. This form is what allows linear constraints. - -The likelihood surface for MARSS models can be multimodal or with strong ridges. It is recommended that for final analyses the estimates are checked by using a Monte Carlo initial conditions search; see the chapter on initial conditions searches in the User Guide. This requires more computation time, but reduces the chance of the algorithm terminating at a local maximum and not reaching the true MLEs. Also it is wise to check the EM results against the BFGS results (if possible) if there are strong ridges in the likelihood. Such ridges seems to slow down the EM algorithm considerably and can cause the algorithm to report convergence far from the maximum-likelihood values. EM steps up the likelihood and the convergence test is based on the rate of change of the log-likelihood in each step. Once on a strong ridge, the steps can slow dramatically. You can force the algorithm to keep working by setting \code{minit}. BFGS seems less hindered by the ridges but can be prodigiously slow for some multivariate problems. BFGS tends to work better if you give it good initial conditions (see Examples below for how to do this). - -If you are working with models with time-varying parameters, it is important to notice the time-index for the parameters in the process equation (the \eqn{\mathbf{x}}{x} equation). In some formulations (e.g. in \code{\link[KFAS]{KFAS}}), the process equation is \eqn{\mathbf{x}_t=\mathbf{B}_{t-1}\mathbf{x}_{t-1}+\mathbf{w}_{t-1}}{x(t)=B(t-1)x(t-1)+w(t-1)} so \eqn{\mathbf{B}_{t-1}}{B(t-1)} goes with \eqn{\mathbf{x}_t}{x(t)} not \eqn{\mathbf{B}_t}{B(t)}. Thus one needs to be careful to line up the time indices when passing in time-varying parameters to \code{MARSS()}. See the User Guide for examples. -} - -\value{ - An object of class \code{\link{marssMLE}}. The structure of this object is discussed below, but if you want to know how to get specific output (like residuals, coefficients, smoothed states, confidence intervals, etc), see \code{\link{print.marssMLE}()}, \code{\link{tidy.marssMLE}()}, \code{\link{MARSSresiduals}()} and \code{\link{plot.marssMLE}()}. - - The outputted \code{\link{marssMLE}} object has the following components: - \item{model}{ MARSS model specification. It is a \code{\link{marssMODEL}} object in the form specified by the user in the \code{MARSS()} call. This is used by print functions so that the user sees the expected form.} - \item{marss}{ The \code{\link{marssMODEL}} object in marss form. This form is needed for all the internal algorithms, thus is a required part of a \code{\link{marssMLE}} object.} - \item{call}{ All the information passed in in the \code{MARSS()} call. } - \item{start}{ List with specifying initial values that were used for each parameter matrix. } - \item{control}{ A list of estimation options, as specified by arguments \code{control}. } - \item{method}{ Estimation method. } - If \code{fit=TRUE}, the following are also added to the \code{\link{marssMLE}} object. - If \code{fit=FALSE}, a \code{\link{marssMLE}} object ready for fitting via the specified \code{method} is returned. - \item{par}{ A list of estimated parameter values in marss form. Use \code{\link[=print.marssMLE]{print}()}, \code{\link[=tidy.marssMLE]{tidy}()} or \code{\link[=coef.marssMLE]{coef}()} for outputing the model estimates in the \code{MARSS()} call (e.g. the default "marxss" form).} - \item{states}{ The expected value of \eqn{\mathbf{X}}{X} conditioned on all the data, i.e. smoothed states. } - \item{states.se}{ The standard errors of the expected value of \eqn{\mathbf{X}}{X}. } - \item{ytT}{ The expected value of \eqn{\mathbf{Y}}{Y} conditioned on all the data. Note this is just \eqn{y} for those \eqn{y} that are not missing. } - \item{ytT.se}{ The standard errors of the expected value of \eqn{\mathbf{Y}}{Y}. Note this is 0 for any non-missing \eqn{y}.} - \item{numIter}{ Number of iterations required for convergence. } - \item{convergence}{ Convergence status. 0 means converged successfully, 3 means all parameters were fixed (so model did not need to be fit) and -1 means call was made with \code{fit=FALSE} and parameters were not fixed (thus no \code{$par} element and Kalman filter/smoother cannot be run). Anything else is a warning or error. 2 means the \code{\link{marssMLE}} object has an error; the object is returned so you can debug it. The other numbers are errors during fitting. The error code depends on the fitting method. See \code{\link{MARSSkem}} and \code{\link{MARSSoptim}}. } - \item{logLik}{ Log-likelihood. } - \item{AIC}{ Akaike's Information Criterion. } - \item{AICc}{ Sample size corrected AIC. } -If \code{control$trace} is set to 1 or greater, the following are also added to the \code{\link{marssMLE}} object. - \item{kf}{ A list containing Kalman filter/smoother output from \code{\link{MARSSkf}()}. This is not normally added to a \code{\link{marssMLE}} object since it is verbose, but can be added using \code{\link{MARSSkf}()}.} - \item{Ey}{ A list containing output from \code{\link{MARSShatyt}}. This isn't normally added to a \code{\link{marssMLE}} object since it is verbose, but can be computed using \code{\link{MARSShatyt}()}.} -} -\references{ -The MARSS User Guide: Holmes, E. E., E. J. Ward, and M. D. Scheuerell (2012) Analysis of multivariate time-series using the MARSS package. NOAA Fisheries, Northwest Fisheries Science -Center, 2725 Montlake Blvd E., Seattle, WA 98112 Type \code{RShowDoc("UserGuide",package="MARSS")} to open a copy. - -Holmes, E. E. (2012). Derivation of the EM algorithm for constrained and unconstrained multivariate autoregressive state-space (MARSS) models. Technical Report. arXiv:1302.3919 [stat.ME] - -Holmes, E. E., E. J. Ward and K. Wills. (2012) MARSS: Multivariate autoregressive state-space models for analyzing time-series data. R Journal 4: 11-19. -} -\author{ - Eli Holmes, Eric Ward and Kellie Wills, NOAA, Seattle, USA. -} -\seealso{ -\code{\link{marssMLE}}, \code{\link{MARSSkem}()}, \code{\link{MARSSoptim}()}, \code{\link{MARSSkf}()}, \code{\link{MARSS-package}}, \code{\link{print.marssMLE}()}, \code{\link{plot.marssMLE}()}, \code{\link{print.marssMODEL}()}, \code{\link{MARSS.marxss}()}, \code{\link{MARSS.dfa}()}, \code{\link[=fitted.marssMLE]{fitted}()}, \code{\link[=residuals.marssMLE]{residuals}()}, \code{\link{MARSSresiduals}()}, \code{\link[=predict.marssMLE]{predict}()}, \code{\link[=tsSmooth.marssMLE]{tsSmooth}()}, -\code{\link[=tidy.marssMLE]{tidy}()}, \code{\link[=coef.marssMLE]{coef}()} -} -\examples{ -dat <- t(harborSealWA) -dat <- dat[2:4, ] # remove the year row -# fit a model with 1 hidden state and 3 observation time series -kemfit <- MARSS(dat, model = list( - Z = matrix(1, 3, 1), - R = "diagonal and equal" -)) -kemfit$model # This gives a description of the model -print(kemfit$model) # same as kemfit$model -summary(kemfit$model) # This shows the model structure - -# add CIs to a marssMLE object -# default uses an estimated Hessian matrix -kem.with.hess.CIs <- MARSSparamCIs(kemfit) -kem.with.hess.CIs - -# fit a model with 3 hidden states (default) -kemfit <- MARSS(dat, silent = TRUE) # suppress printing -kemfit - -# Fit the above model with BFGS using a short EM fit as initial conditions -kemfit <- MARSS(dat, control=list(minit=5, maxit=5)) -bffit <- MARSS(dat, method="BFGS", inits=kemfit) - -# fit a model with 3 correlated hidden states -# with one variance and one covariance -# maxit set low to speed up example, but more iters are needed for convergence -kemfit <- MARSS(dat, model = list(Q = "equalvarcov"), control = list(maxit = 50)) -# use Q="unconstrained" to allow different variances and covariances - -# fit a model with 3 independent hidden states -# where each observation time series is independent -# the hidden trajectories 2-3 share their U parameter -kemfit <- MARSS(dat, model = list(U = matrix(c("N", "S", "S"), 3, 1))) - -# same model, but with fixed independent observation errors -# and the 3rd x processes are forced to have a U=0 -# Notice how a list matrix is used to combine fixed and estimated elements -# all parameters can be specified in this way using list matrices -kemfit <- MARSS(dat, model = list(U = matrix(list("N", "N", 0), 3, 1), R = diag(0.01, 3))) - -# fit a model with 2 hidden states (north and south) -# where observation time series 1-2 are north and 3 is south -# Make the hidden state process independent with same process var -# Make the observation errors different but independent -# Make the growth parameters (U) the same -# Create a Z matrix as a design matrix that assigns the "N" state to the first 2 rows of dat -# and the "S" state to the 3rd row of data -Z <- matrix(c(1, 1, 0, 0, 0, 1), 3, 2) -# You can use factor is a shortcut making the above design matrix for Z -# Z <- factor(c("N","N","S")) -# name the state vectors -colnames(Z) <- c("N", "S") -kemfit <- MARSS(dat, model = list( - Z = Z, - Q = "diagonal and equal", R = "diagonal and unequal", U = "equal" -)) - -# print the model followed by the marssMLE object -kemfit$model - -\dontrun{ -# simulate some new data from our fitted model -sim.data <- MARSSsimulate(kemfit, nsim = 10, tSteps = 10) - -# Compute bootstrap AIC for the model; this takes a long, long time -kemfit.with.AICb <- MARSSaic(kemfit, output = "AICbp") -kemfit.with.AICb -} - -\dontrun{ -# Many more short examples can be found in the -# Quick Examples chapter in the User Guide -RShowDoc("UserGuide", package = "MARSS") - -# You can find the R scripts from the chapters by -# going to the index page -RShowDoc("index", package = "MARSS") -} - -} - diff --git a/man/MARSSFisherI.Rd b/man/MARSSFisherI.Rd deleted file mode 100644 index a66dbac..0000000 --- a/man/MARSSFisherI.Rd +++ /dev/null @@ -1,63 +0,0 @@ -\name{MARSSFisherI} -\alias{MARSSFisherI} - -\title{ Observed Fisher Information Matrix at the MLE } -\description{ -Returns the observed Fisher Information matrix for a \code{\link{marssMLE}} object (a fitted MARSS model) via either the analytical algorithm of Harvey (1989) or a numerical estimate. - -The observed Fisher Information is the negative of the second-order partial derivatives of the log-likelihood function evaluated at the MLE. The derivatives being with respect to the parameters. The Hessian matrix is the second-order partial derivatives of a scalar-valued function. Thus the observed Fisher Information matrix is the Hessian of the negative log-likelihood function evaluated at the MLE (or equivalently the negative of the Hessian of the log-likelihood function). The inverse of the observed Fisher Information matrix is an estimate of the asymptotic variance-covariance matrix for the estimated parameters. Use \code{\link{MARSShessian}()} (which calls \code{MARSSFisherI()}) to return the parameter variance-covariance matrix computed from the observed Fisher Information matrix. - -Note for the numerically estimated Hessian, we pass in the negative log-likelihood function to a minimization function. As a result, the numerical functions return the Hessian of the negative log-likelihood function (which is the observed Fisher Information matrix). -} -\usage{ -MARSSFisherI(MLEobj, method = c("Harvey1989", "fdHess", "optim")) -} -\arguments{ - \item{MLEobj}{ An object of class \code{\link{marssMLE}}. - This object must have a \code{$par} element containing MLE parameter estimates from e.g. \code{\link{MARSSkem}()}. } - \item{method}{ The method to use for computing the observed Fisher Information matrix. Options are \code{"Harvey1989"} to use the Harvey (1989) recursion, which is an analytical solution, \code{"fdHess"} or \code{"optim"} which are two numerical methods. Although 'optim' can be passed to the function, 'fdHess' is used for all numerical estimates used in the MARSS package.} - } -\details{ - Method 'fdHess' uses \code{\link{fdHess}()} from package \code{\link{nlme}} to numerically estimate the Hessian of the negative log-likelihood function at the MLEs. Method 'optim' uses \code{\link{optim}()} with \code{hessian=TRUE} and \code{list(maxit=0)} to ensure that the Hessian is computed at the values in the \code{par} element of the MLE object. The \code{par} element of the \code{\link{marssMLE}} object is the MLE. - -Method 'Harvey1989' (the default) uses the recursion in Harvey (1989) to compute the observed Fisher Information of a MARSS model analytically. See Holmes (2016c) for a discussion of the Harvey (1989) algorithm and see Holmes (2017) on how to implement the algorithm for MARSS models with linear constraints (the type of MARSS models that the MARSS R package addresses). - -There has been research on computing the observed Fisher Information matrix from the derivatives used by EM algorithms (discussed in Holmes (2016a, 2016b)), for example Louis (1982). Unfortunately, the EM algorithm used in the MARSS package is for time series data and the temporal correlation must be dealt with, e.g. Duan & Fulop (2011). Oakes (1999) has an approach that only involves derivatives of \eqn{\textrm{E}[LL(\Theta)|\mathbf{y},\Theta']}{E(LL(Theta)|data, Theta')} but one of the derivatives will be the derivative of the \eqn{\textrm{E}[\mathbf{X}|\mathbf{y},\Theta']}{E(X|data, Theta')} with respect to \eqn{\Theta'}{Theta'}. It is not clear how to do that derivative. Moon-Ho, Shumway and Ombao (2006) suggest (page 157) that this derivative is hard to compute. - -} -\value{ - Returns the observed Fisher Information matrix. -} -\author{ - Eli Holmes, NOAA, Seattle, USA. -} -\seealso{ \code{\link{MARSSharveyobsFI}()}, \code{\link{MARSShessian.numerical}}, \code{\link{MARSSparamCIs}}, \code{\link{marssMLE}} } -\examples{ -dat <- t(harborSeal) -dat <- dat[2:4, ] -MLEobj <- MARSS(dat, model=list(Z=matrix(1,3,1), R="diagonal and equal")) -MARSSFisherI(MLEobj) -MARSSFisherI(MLEobj, method="fdHess") - -} -\references{ -Harvey, A. C. (1989) Section 3.4.5 (Information matrix) in Forecasting, structural time series models and the Kalman filter. Cambridge University Press, Cambridge, UK. - -See also J. E. Cavanaugh and R. H. Shumway (1996) On computing the expected Fisher information matrix for state-space model parameters. Statistics & Probability Letters 26: 347-355. This paper discusses the Harvey (1989) recursion (and proposes an alternative). - -Holmes, E. E. 2016a. Notes on computing the Fisher Information matrix for MARSS models. Part I Background. Technical Report. https://doi.org/10.13140/RG.2.2.27306.11204/1 \href{https://eeholmes.github.io/posts/2016-5-18-FI-recursion-1/}{Notes} - -Holmes, E. E. 2016b. Notes on computing the Fisher Information matrix for MARSS models. Part II Louis 1982. Technical Report. https://doi.org/10.13140/RG.2.2.35694.72000 \href{https://eeholmes.github.io/posts/2016-5-19-FI-recursion-2/}{Notes} - -Holmes, E. E. 2016c. Notes on computing the Fisher Information matrix for MARSS models. Part III Overview of Harvey 1989. https://eeholmes.github.io/posts/2016-6-16-FI-recursion-3/ - -Holmes, E. E. 2017. Notes on computing the Fisher Information matrix for MARSS models. Part IV Implementing the Recursion in Harvey 1989. https://eeholmes.github.io/posts/2017-5-31-FI-recursion-4/ - -Duan, J. C. and A. Fulop. (2011) A stable estimator of the information matrix under EM for dependent data. Statistics and Computing 21: 83-91 - -Louis, T. A. 1982. Finding the observed information matrix when using the EM algorithm. Journal of the Royal Statistical Society. Series B (Methodological). 44: 226-233. - -Oakes, D. 1999. Direct calculation of the information matrix via the EM algorithm. Journal of the Royal Statistical Society. Series B (Methodological). 61: 479-482. - -Moon-Ho, R. H., R. H. Shumway, and Ombao 2006. The state-space approach to modeling dynamic processes. Chapter 7 in Models for Intensive Longitudinal Data. Oxford University Press. -} diff --git a/man/MARSS_dfa.Rd b/man/MARSS_dfa.Rd deleted file mode 100644 index e92c71f..0000000 --- a/man/MARSS_dfa.Rd +++ /dev/null @@ -1,83 +0,0 @@ -\name{MARSS.dfa} -\alias{MARSS.dfa} - -\title{ Multivariate Dynamic Factor Analysis } - -\description{ -The Dynamic Factor Analysis model in MARSS is -The argument \code{form="marxss"} in a \code{\link{MARSS}()} function call specifies a MAR-1 model with eXogenous variables model. This is a MARSS(1) model of the form: -\deqn{\mathbf{x}_{t} = \mathbf{x}_{t-1} + \mathbf{w}_t, \textrm{ where } \mathbf{W}_t \sim \textrm{MVN}(0,\mathbf{I})}{x(t) = x(t-1) + w(t), where W(t) ~ MVN(0,I)} -\deqn{\mathbf{y}_t = \mathbf{Z}_t \mathbf{x}_t + \mathbf{D}_t \mathbf{d}_t + \mathbf{v}_t, \textrm{ where } \mathbf{V}_t \sim \textrm{MVN}(0,\mathbf{R}_t)}{y(t) = Z(t) x(t) + D(t) d(t) + v(t), where V(t) ~ MVN(0,R(t))} -\deqn{\mathbf{X}_1 \sim \textrm{MVN}(\mathbf{x0}, 5\mathbf{I})}{X(1) ~ MVN(x0, 5I) } -Note, by default \eqn{\mathbf{x}_1}{x(1)} is treated as a diffuse prior. - -Passing in \code{form="dfa"} to \code{\link{MARSS}()} invokes a helper function to create that model and creates the \eqn{\mathbf{Z}}{Z} matrix for the user. \eqn{\mathbf{Q}}{Q} is by definition identity, \eqn{\mathbf{x}_0}{x0} is zero and \eqn{\mathbf{V_0}}{V0} is diagonal with large variance (5). \eqn{\mathbf{u}}{U} is zero, \eqn{\mathbf{a}}{A} is zero, and covariates only enter the \eqn{\mathbf{y}}{Y} equation. Because \eqn{\mathbf{u}}{U} and \eqn{\mathbf{a}}{A} are 0, the data should have mean 0 (demeaned) otherwise one is likely to be creating a structurally inadequate model (i.e. the model implies that the data have mean = 0, yet data do not have mean = 0 ). -} -\section{Usage}{ -\code{MARSS(y, - inits = NULL, - model = NULL, - miss.value = as.numeric(NA), - method = "kem", - form = "dfa", - fit = TRUE, - silent = FALSE, - control = NULL, - fun.kf = "MARSSkfas", - demean = TRUE, - z.score = TRUE)} -} - -\arguments{ -Some arguments are common to all forms: "y" (data), "inits", "control", "method", "form", "fit", "silent", "fun.kf". See \code{\link{MARSS}} for information on these arguments. - -In addition to these, form="dfa" has some special arguments that can be passed in: -\itemize{ - \item \code{demean} Logical. Default is TRUE, which means the data will be demeaned. - \item \code{z.score} Logical. Default is TRUE, which means the data will be z-scored (demeaned and variance standardized to 1). - \item \code{covariates} Covariates (\eqn{d}) for the \eqn{y} equation. No missing values allowed and must be a matrix with the same number of time steps as the data. An unconstrained \eqn{D} matrix will estimated. - } - -The \code{model} argument of the \code{\link{MARSS}()} call is constrained in terms of what parameters can be changed and how they can be changed. See details below. An additional element, \code{m}, can be passed into the \code{model} argument that specifies the number of hidden state variables. It is not necessarily for the user to specify \code{Z} as the helper function will create a \code{Z} appropriate for a DFA model. -} -\details{ -The \code{model} argument is a list. The following details what list elements can be passed in: - \itemize{ - \item \code{B} "Identity". The standard (and default) DFA model has B="identity". However it can be "identity", "diagonal and equal", "diagonal and unequal" or a time-varying fixed or estimated diagonal matrix. - \item \code{U} "Zero". Cannot be changed or passed in via model argument. - \item \code{Q} "Identity". The standard (and default) DFA model has Q="identity". However, it can be "identity", "diagonal and equal", "diagonal and unequal" or a time-varying fixed or estimated diagonal matrix. - \item \code{Z} Can be passed in as a (list) matrix if the user does not want a default DFA \code{Z} matrix. There are many equivalent ways to construct a DFA \code{Z} matrix. The default is Zuur et al.'s form (see User Guide). - \item \code{A} Default="zero". Can be "unequal", "zero" or a matrix. - \item \code{R} Default="diagonal and equal". Can be set to "identity", "zero", "unconstrained", "diagonal and unequal", "diagonal and equal", "equalvarcov", or a (list) matrix to specify general forms. - \item \code{x0} Default="zero". Can be "unconstrained", "unequal", "zero", or a (list) matrix. - \item \code{V0} Default=diagonal matrix with 5 on the diagonal. Can be "identity", "zero", or a matrix. - \item \code{tinitx} Default=0. Can be 0 or 1. Tells MARSS whether x0 is at t=0 or t=1. - \item \code{m} Default=1. Can be 1 to n (the number of y time-series). Must be integer. - } - -See the \href{https://cran.r-project.org/package=MARSS/vignettes/UserGuide.pdf}{User Guide} chapter on Dynamic Factor Analysis for examples of of using \code{form="dfa"}. -} -\value{ -A object of class \code{\link{marssMLE}}. See \code{\link[=print.marssMLE]{print}()} for a discussion of the various output available for \code{\link{marssMLE}} objects (coefficients, residuals, Kalman filter and smoother output, imputed values for missing data, etc.). See \code{\link{MARSSsimulate}()} for simulating from \code{\link{marssMLE}} objects. \code{\link{MARSSboot}()} for bootstrapping, \code{\link{MARSSaic}()} for calculation of various AIC related model selection metrics, and \code{\link{MARSSparamCIs}()} for calculation of confidence intervals and bias. -} -\author{ - Eli Holmes, NOAA, Seattle, USA. - } - -\seealso{ \code{\link{MARSS}()}, \code{\link{MARSS.marxss}()} } - -\references{ -The MARSS User Guide: Holmes, E. E., E. J. Ward, and M. D. Scheuerell (2012) Analysis of multivariate time-series using the MARSS package. NOAA Fisheries, Northwest Fisheries Science -Center, 2725 Montlake Blvd E., Seattle, WA 98112 Type \code{RShowDoc("UserGuide",package="MARSS")} to open a copy. -} -\examples{ -\dontrun{ -dat <- t(harborSealWA[,-1]) -# DFA with 3 states; used BFGS because it fits much faster for this model -fit <- MARSS(dat, model = list(m=3), form="dfa", method="BFGS") - -# See the Dynamic Factor Analysis chapter in the User Guide -RShowDoc("UserGuide", package = "MARSS") -} -} - diff --git a/man/MARSS_marss.Rd b/man/MARSS_marss.Rd deleted file mode 100644 index 73cbe69..0000000 --- a/man/MARSS_marss.Rd +++ /dev/null @@ -1,51 +0,0 @@ -\name{MARSS.marss} -\alias{MARSS.marss} -\keyword{appendix} - - -\title{ Multivariate AR-1 State-space Model } -\description{ -The form of MARSS models for users is "marxss", the MARSS models with inputs. See \code{\link{MARSS.marxss}}. In the internal algorithms (e.g. \code{\link{MARSSkem}}), the "marss" form is used and the \eqn{\mathbf{D}\mathbf{d}_t}{Dd(t)} are incorporated into the \eqn{\mathbf{a}_t}{a(t)} matrix and \eqn{\mathbf{C}\mathbf{c}_t}{Cc(t)} are incorporated into the \eqn{\mathbf{u}_t}{u(t)}. The \eqn{\mathbf{a}}{a} and \eqn{\mathbf{u}}{u} matrices then become time-varying if the model includes \eqn{\mathbf{d}_t}{d(t)} and \eqn{\mathbf{c}_t}{c(t)}. - -This is a MARSS(1) model of the marss form: -\deqn{\mathbf{x}_{t} = \mathbf{B} \mathbf{x}_{t-1} + \mathbf{u}_t + \mathbf{G} \mathbf{w}_t, \textrm{ where } \mathbf{W}_t \sim \textrm{MVN}(0,\mathbf{Q})}{x(t) = B x(t-1) + u(t) + G w(t), where W(t) ~ MVN(0,Q)} -\deqn{\mathbf{y}_t = \mathbf{Z} \mathbf{x}_t + \mathbf{a}_t + \mathbf{H} \mathbf{v}_t, \textrm{ where } \mathbf{V}_t \sim \textrm{MVN}(0,\mathbf{R})}{y(t) = Z x(t) + a(t) + H v(t), where V(t) ~ MVN(0,R)} -\deqn{\mathbf{X}_1 \sim \textrm{MVN}(\mathbf{x0}, \mathbf{V0}) \textrm{ or } \mathbf{X}_0 \sim \textrm{MVN}(\mathbf{x0}, \mathbf{V0}) }{X(1) ~ MVN(x0, V0) or X(0) ~ MVN(x0, V0) }Note, by default \eqn{\mathbf{V0}}{V0} is a matrix of all zeros and thus \eqn{\mathbf{x}_1}{x(1)} or \eqn{\mathbf{x}_0}{x(0)} is treated as an estimated parameter not a diffuse prior. To remove clutter, the rest of the parameters are shown as time-constant (no \eqn{t} subscript) but all parameters can be time-varying. - - -Note, "marss" is a model form. A model form is defined by a collection of form functions discussed in \code{\link{marssMODEL}}. These functions are not exported to the user, but are called by \code{\link{MARSS}()} using the argument \code{form}. These internal functions convert the users model list into the vec form of a MARSS model and do extensive error-checking. -} -\section{Usage}{ -\code{MARSS(y, - inits = NULL, - model = NULL, - miss.value = as.numeric(NA), - method = "kem", - form = "marxss", - fit = TRUE, - silent = FALSE, - control = NULL, - fun.kf = "MARSSkfas", - ...)} -} - -\details{ -See the help page for the \code{\link{MARSS.marxss}} form for details. -} -\value{ -A object of class \code{\link{marssMLE}}. -} -\author{ - Eli Holmes, NOAA, Seattle, USA. - - } -\seealso{ \code{\link{marssMODEL}}, \code{\link{MARSS.marxss}()} } -\examples{ -\dontrun{ -# See the MARSS man page for examples -?MARSS - -# and the Quick Examples chapter in the User Guide -RShowDoc("UserGuide", package = "MARSS") -} -} diff --git a/man/MARSS_marxss.Rd b/man/MARSS_marxss.Rd deleted file mode 100644 index 7b3c616..0000000 --- a/man/MARSS_marxss.Rd +++ /dev/null @@ -1,78 +0,0 @@ -\name{MARSS.marxss} -\alias{MARSS.marxss} -\keyword{appendix} - -\title{ Multivariate AR-1 State-space Model with Inputs } -\description{ -The argument \code{form="marxss"} in a \code{\link{MARSS}()} function call specifies a MAR-1 model with eXogenous variables model. This is a MARSS(1) model of the form: -\deqn{\mathbf{x}_{t} = \mathbf{B}_t \mathbf{x}_{t-1} + \mathbf{u}_t + \mathbf{C}_t \mathbf{c}_t + \mathbf{G}_t \mathbf{w}_t, \textrm{ where } \mathbf{W}_t \sim \textrm{MVN}(0,\mathbf{Q}_t)}{x(t) = B(t) x(t-1) + u(t) + C(t) c(t) + G(t) w(t), where W(t) ~ MVN(0,Q(t))} -\deqn{\mathbf{y}_t = \mathbf{Z}_t \mathbf{x}_t + \mathbf{a}_t + \mathbf{D}_t \mathbf{d}_t + \mathbf{H}_t \mathbf{v}_t, \textrm{ where } \mathbf{V}_t \sim \textrm{MVN}(0,\mathbf{R}_t)}{y(t) = Z(t) x(t) + a(t) + D(t) d(t) + H(t) v(t), where V(t) ~ MVN(0,R(t))} -\deqn{\mathbf{X}_1 \sim \textrm{MVN}(\mathbf{x0}, \mathbf{V0}) \textrm{ or } \mathbf{X}_0 \sim \textrm{MVN}(\mathbf{x0}, \mathbf{V0}) }{X(1) ~ MVN(x0, V0) or X(0) ~ MVN(x0, V0) } -Note, by default \eqn{\mathbf{V0}}{V0} is a matrix of all zeros and thus \eqn{\mathbf{x}_1}{x(1)} or \eqn{\mathbf{x}_0}{x(0)} is treated as an estimated parameter not a diffuse prior. - -Note, "marxss" is a model form. A model form is defined by a collection of form functions discussed in \code{\link{marssMODEL}}. These functions are not exported to the user, but are called by \code{\link{MARSS}()} using the argument \code{form}. -} -\section{Usage}{ -\code{MARSS(y, - inits = NULL, - model = NULL, - miss.value = as.numeric(NA), - method = "kem", - form = "marxss", - fit = TRUE, - silent = FALSE, - control = NULL, - fun.kf = "MARSSkfas", - ...)} -} - -\details{ -The allowed arguments when \code{form="marxss"} are 1) the arguments common to all forms: "y" (data), "inits", "control", "method", "form", "fit", "silent", "fun.kf" (see \code{\link{MARSS}} for information on these arguments) and 2) the argument "model" which is a list describing the MARXSS model (the model list is described below). -See the \href{https://cran.r-project.org/package=MARSS/vignettes/Quick_Start.html}{Quick Start Guide} guide or the \href{https://cran.r-project.org/package=MARSS/vignettes/UserGuide.pdf}{User Guide} for examples. - -The argument \code{model} must be a list. The elements in the list specify the structure for the \eqn{\mathbf{B}}{B}, \eqn{\mathbf{u}}{u}, \eqn{\mathbf{C}}{C}, \eqn{\mathbf{c}}{c}, \eqn{\mathbf{Q}}{Q}, \eqn{\mathbf{Z}}{Z}, \eqn{\mathbf{a}}{a}, \eqn{\mathbf{D}}{D}, \eqn{\mathbf{d}}{d}, \eqn{\mathbf{R}}{R}, \eqn{\mathbf{x}_0}{x0}, and \eqn{\mathbf{V}_0}{V0} in the MARXSS model (above). The list elements can have the following values: -\describe{ - \item{\code{Z}}{Default="identity". A text string, "identity","unconstrained", "diagonal and unequal", "diagonal and equal", "equalvarcov", or "onestate", or a length n vector of factors specifying which of the m hidden state time series correspond to which of the n observation time series. May be specified as a n x m list matrix for general specification of both fixed and shared elements within the matrix. May also be specified as a numeric n x m matrix to use a custom fixed \eqn{\mathbf{Z}}{Z}. "onestate" gives a n x 1 matrix of 1s. "identity","unconstrained", "diagonal and unequal", "diagonal and equal", and "equalvarcov" all specify n x n matrices.} - \item{\code{B}}{Default="identity". A text string, "identity", "unconstrained", "diagonal and unequal", "diagonal and equal", "equalvarcov", "zero". Can also be specified as a list matrix for general specification of both fixed and shared elements within the matrix. May also be specified as a numeric m x m matrix to use custom fixed \eqn{\mathbf{B}}{B}, but in this case all the eigenvalues of \eqn{\mathbf{B}}{B} must fall in the unit circle.} - \item{\code{U}, \code{x0}}{Default="unconstrained". A text string, "unconstrained", "equal", "unequal" or "zero". May be specified as a m x 1 list matrix for general specification of both fixed and shared elements within the matrix. May also be specified as a numeric m x 1 matrix to use a custom fixed \eqn{\mathbf{u}}{u} or \eqn{\mathbf{x}_0}{x(0)}. Notice that \code{U} is capitalized in the \code{model} argument and output lists.} - \item{\code{A}}{Default="scaling". A text string, "scaling","unconstrained", "equal", "unequal" or "zero". May be specified as a n x 1 list matrix for general specification of both fixed and shared elements within the matrix. May also be specified as a numeric n x 1 matrix to use a custom fixed \eqn{\mathbf{a}}{a}. Care must be taken when specifying \code{A} so that the model is not under-constrained and unsolvable model. The default, "scaling", only applies to \eqn{\mathbf{Z}}{Z} matrices that are design matrices (only 1s and 0s and all rows sum to 1). When a column in \eqn{\mathbf{Z}}{Z} has multiple 1s, the first row in the \eqn{\mathbf{a}}{a} matrix associated with those \eqn{\mathbf{Z}}{Z} rows is 0 and the other associated \eqn{\mathbf{a}}{a} rows have an estimated value. This is used to treat \eqn{\mathbf{a}}{a} as an intercept where one intercept for each \eqn{\mathbf{x}}{x} (hidden state) is fixed at 0 and any other intercepts associated with that \eqn{\mathbf{x}}{x} have an estimated intercept. This ensures a solvable model when \eqn{\mathbf{Z}}{Z} is a design matrix. Note in the model argument and output, \code{A} is capitalized.} - \item{\code{Q}}{Default="diagonal and unequal". A text string, "identity", "unconstrained", "diagonal and unequal", "diagonal and equal", "equalvarcov", "zero". May be specified as a list matrix for general specification of both fixed and shared elements within the matrix. May also be specified as a numeric g x g matrix to use a custom fixed matrix. Default value of g is m, so \eqn{\mathbf{Q}}{Q} is a m x m matrix. g is the number of columns in \eqn{\mathbf{G}}{G} (below).} - \item{\code{R}}{Default="diagonal and equal". A text string, "identity", "unconstrained", "diagonal and unequal", "diagonal and equal", "equalvarcov", "zero". May be specified as a list matrix for general specification of both fixed and shared elements within the matrix. May also be specified as a numeric h x h matrix to use a custom fixed matrix. Default value of h is n, so \eqn{\mathbf{R}}{R} is a n x n matrix. h is the num of columns in \eqn{\mathbf{H}}{H} (below).} - \item{\code{V0}}{Default="zero". A text string, "identity", "unconstrained", "diagonal and unequal", "diagonal and equal", "equalvarcov", "zero". May be specified as a list matrix for general specification of both fixed and shared elements within the matrix. May also be specified as a numeric m x m matrix to use a custom fixed matrix.} - \item{\code{D} and \code{C}}{Default="zero". A text string, "identity", "unconstrained", "diagonal and unequal", "diagonal and equal", "equalvarcov", "zero". Can be specified as a list matrix for general specification of both fixed and shared elements within the matrix. May also be specified as a numeric matrix to use custom fixed values. Must have n rows (\eqn{\mathbf{D}}{D}) or m rows (\eqn{\mathbf{C}}{C}).} - \item{\code{d} and \code{c}}{Default="zero". Numeric matrix. No missing values allowed. Must have 1 column or the same number of columns as the data, \eqn{\mathbf{y}}{y}. The numbers of rows in \eqn{\mathbf{d}}{d} must be the same as number of columns in \eqn{\mathbf{D}}{D}; similarly for \eqn{\mathbf{c}}{c} and \eqn{\mathbf{C}}{C}.} - \item{\code{G} and \code{H}}{Default="identity". A text string, "identity". Can be specified as a numeric matrix or array for time-varying cases. Must have m rows and g columns (\eqn{\mathbf{G}}{G}) or n rows and h columns (\eqn{\mathbf{H}}{H}). g is the dim of \eqn{\mathbf{Q}}{Q} and h is the dim of \eqn{\mathbf{R}}{R}.} - \item{\code{tinitx}}{Default=0. Whether the initial state is specified at t=0 (default) or t=1.} - } -All parameters except \eqn{\mathbf{x}_0}{x0} and \eqn{\mathbf{V}_0}{V0} may be time-varying. If time-varying, then text shortcuts cannot be used. Enter as an array with the 3rd dimension being time. Time dimension must be 1 or equal to the number of time-steps in the data. See Quick Start guide (\code{RShowDoc("Quick_Start",package="MARSS")}) or the User Guide (\code{RShowDoc("UserGuide",package="MARSS")}) for examples.Valid model structures for \code{method="BFGS"} are the same as for \code{method="kem"}. See \code{\link{MARSSoptim}()} for the allowed options for this method. - - The default estimation method, \code{method="kem"}, is the EM algorithm described in the MARSS User Guide. The default settings for the control and inits arguments are set via \code{MARSS:::alldefaults$kem} in \code{MARSSsettings.R}. The defaults for the model argument are set in \code{MARSS_marxss.R} For this method, they are: -\itemize{ - \item{inits = list(B=1, U=0, Q=0.05, Z=1, A=0, R=0.05, x0=-99, V0=0.05, G=0, H=0, L=0, C=0, D=0, c=0, d=0)} - \item{model = list(Z="identity", A="scaling", R="diagonal and equal", B="identity", U="unconstrained", Q="diagonal and unequal", x0="unconstrained", V0="zero", C="zero",D="zero",c=matrix(0,0,1), d=matrix(0,0,1), tinitx=0, diffuse=FALSE)} - \item{control=list(minit=15, maxit=500, abstol=0.001, trace=0, sparse=FALSE, - safe=FALSE, allow.degen=TRUE, min.degen.iter=50, degen.lim=1.0e-04, - min.iter.conv.test=15, conv.test.deltaT=9, conv.test.slope.tol= 0.5, demean.states=FALSE) You can read about these in \code{\link{MARSS}()}. If you want to speed up your fits, you can turn off most of the model checking using \code{trace=-1}. } - \item{fun.kf = "MARSSkfas"; This sets the Kalman filter function to use. \code{MARSSkfas()} is generally more stable as it uses Durban & Koopman's algorithm. But it may dramatically slow down when the data set is large (more than 10 rows of data). Try the classic Kalman filter algorithm to see if it runs faster by setting \code{fun.kf="MARSSkfss"}. You can read about the two algorithms in \code{\link{MARSSkf}}.} - } -For \code{method="BFGS"}, type \code{MARSS:::alldefaults$BFGS} to see the defaults. - - -} -\value{ -A object of class \code{\link{marssMLE}}. See \code{\link{print.marssMLE}} for a discussion of the various output available for \code{\link{marssMLE}} objects (coefficients, residuals, Kalman filter and smoother output, imputed values for missing data, etc.). See \code{\link{MARSSsimulate}} for simulating from \code{\link{marssMLE}} objects. \code{\link{MARSSboot}} for bootstrapping, \code{\link{MARSSaic}} for calculation of various AIC related model selection metrics, and \code{\link{MARSSparamCIs}} for calculation of confidence intervals and bias. See \code{\link{plot.marssMLE}} for some default plots of a model fit. -} -\author{ - Eli Holmes, NOAA, Seattle, USA. - - } -\seealso{ \code{\link{marssMODEL}}, \code{\link{MARSS.dfa}()} } -\examples{ -\dontrun{ -#See the MARSS man page for examples -?MARSS - -#and the Quick Examples chapter in the User Guide -RShowDoc("UserGuide",package="MARSS") -} -} diff --git a/man/MARSSaic.Rd b/man/MARSSaic.Rd deleted file mode 100644 index 4e8ad80..0000000 --- a/man/MARSSaic.Rd +++ /dev/null @@ -1,54 +0,0 @@ -\name{MARSSaic} -\alias{MARSSaic} -\title{ AIC for MARSS Models } -\description{ - Calculates AIC, AICc, a parametric bootstrap AIC (AICbp) and a non-parametric bootstrap AIC (AICbb). If you simply want the AIC value for a \code{\link{marssMLE}} object, you can use \code{AIC(fit)}. -} -\usage{ -MARSSaic(MLEobj, output = c("AIC", "AICc"), - Options = list(nboot = 1000, return.logL.star = FALSE, - silent = FALSE)) -} -\arguments{ - \item{MLEobj}{ An object of class \code{\link{marssMLE}}. This object must have a \code{$par} element containing MLE parameter estimates from e.g. \code{MARSSkem()}. } - \item{output}{ A vector containing one or more of the following: "AIC", "AICc", "AICbp", "AICbb", "AICi", "boot.params". See Details.} - \item{Options}{ A list containing: - \itemize{ - \item \code{nboot} Number of bootstraps (positive integer) - \item \code{return.logL.star} Return the log-likelihoods for each bootstrap? (T/F) - \item \code{silent} Suppress printing of the progress bar during AIC bootstraps? (T/F) - } - } -} -\details{ - When sample size is small, Akaike's Information Criterion (AIC) under-penalizes more complex models. The most commonly used small sample size corrector is AICc, which uses a penalty term of \eqn{K n/(n-K-1)}, where \eqn{K} is the number of estimated parameters. However, for time series models, AICc still under-penalizes complex models; this is especially true for MARSS models. - -Two small-sample estimators specific for MARSS models have been developed. Cavanaugh and Shumway (1997) developed a variant of bootstrapped AIC using Stoffer and Wall's (1991) bootstrap algorithm ("AICbb"). Holmes and Ward (2010) developed a variant on AICb ("AICbp") using a parametric bootstrap. The parametric bootstrap permits AICb calculation when there are missing values in the data, which Cavanaugh and Shumway's algorithm does not allow. More recently, Bengtsson and Cavanaugh (2006) developed another small-sample AIC estimator, AICi, based on fitting candidate models to multivariate white noise. - -When the \code{output} argument passed in includes both \code{"AICbp"} and \code{"boot.params"}, the bootstrapped parameters from \code{"AICbp"} will be added to \code{MLEobj}. -} -\value{ - Returns the \code{\link{marssMLE}} object that was passed in with additional AIC components added on top as specified in the 'output' argument. -} -\references{ -Holmes, E. E., E. J. Ward, and M. D. Scheuerell (2012) Analysis of multivariate time-series using the MARSS package. NOAA Fisheries, Northwest Fisheries Science -Center, 2725 Montlake Blvd E., Seattle, WA 98112 Type \code{RShowDoc("UserGuide",package="MARSS")} to open a copy. - -Bengtsson, T., and J. E. Cavanaugh. 2006. An improved Akaike information criterion for state-space model selection. Computational Statistics & Data Analysis 50:2635-2654. - -Cavanaugh, J. E., and R. H. Shumway. 1997. A bootstrap variant of AIC for state-space model selection. Statistica Sinica 7:473-496. -} -\author{ - Eli Holmes, NOAA, Seattle, USA. -} -\seealso{ \code{\link{MARSSboot}()} } -\examples{ -dat <- t(harborSealWA) -dat <- dat[2:3, ] -kem <- MARSS(dat, model = list( - Z = matrix(1, 2, 1), - R = "diagonal and equal" -)) -kemAIC <- MARSSaic(kem, output = c("AIC", "AICc")) -} - diff --git a/man/MARSSapplynames.Rd b/man/MARSSapplynames.Rd deleted file mode 100644 index 6599329..0000000 --- a/man/MARSSapplynames.Rd +++ /dev/null @@ -1,26 +0,0 @@ -\name{MARSSapplynames} -\alias{MARSSapplynames} -\keyword{internal} - -\title{ Names for marssMLE Object Components } -\description{ -Puts names on the par, start, par.se, init components of \code{\link{marssMLE}} objects. This is a utility function in the \code{\link{MARSS-package}} and is not exported. -} -\usage{ -MARSSapplynames(MLEobj) -} -\arguments{ - \item{MLEobj}{ An object of class \code{\link{marssMLE}}. } -} -\details{ -The X.names and Y.names are attributes of \code{\link{marssMODEL}} objects (which would be in \code{$marss} and \code{$model} in the \code{\link{marssMLE}} object). These names are applied to the par elements in the \code{\link{marssMLE}} object. -} -\value{ - The object passed in, with row and column names on matrices as specified. -} -\author{ - Eli Holmes, NOAA, Seattle, USA. -} -\seealso{ \code{\link{marssMLE}}, \code{\link{marssMODEL}} } - - diff --git a/man/MARSSboot.Rd b/man/MARSSboot.Rd deleted file mode 100644 index f975fc2..0000000 --- a/man/MARSSboot.Rd +++ /dev/null @@ -1,76 +0,0 @@ -\name{MARSSboot} -\alias{MARSSboot} -\title{ Bootstrap MARSS Parameter Estimates } -\description{ -Creates bootstrap parameter estimates and simulated (or bootstrapped) data (if appropriate). This is a base function in the \code{\link{MARSS-package}}. -} -\usage{ -MARSSboot(MLEobj, nboot = 1000, - output = "parameters", sim = "parametric", - param.gen = "MLE", control = NULL, silent = FALSE) -} - -\arguments{ - \item{MLEobj}{ An object of class \code{\link{marssMLE}}. Must have a \code{$par} element containing MLE parameter estimates. } - \item{nboot}{ Number of bootstraps to perform. } - \item{output}{ Output to be returned: "data", "parameters" or "all".} - \item{sim}{ Type of bootstrap: "parametric" or "innovations". See Details. } - \item{param.gen}{ Parameter generation method: "hessian" or "MLE". } - \item{control}{ The options in \code{MLEobj$control} are used by default. If supplied here, must contain all of the following: - \describe{ - \item{\code{max.iter}}{ Maximum number of EM iterations. } - \item{\code{tol}}{ Optional tolerance for log-likelihood change. If log-likelihood decreases less than this amount relative to the previous iteration, the EM algorithm exits. } - \item{\code{allow.degen}}{ Whether to try setting \eqn{\mathbf{Q}}{Q} or \eqn{\mathbf{R}}{R} elements to zero if they appear to be going to zero. } - } - } - \item{silent}{ Suppresses printing of progress bar. } -} - -\details{ -Approximate confidence intervals (CIs) on the model parameters can be calculated by the observed Fisher Information matrix (the Hessian of the negative log-likelihood function). The Hessian CIs (\code{param.gen="hessian"}) are based on the asymptotic normality of ML estimates under a large-sample approximation. CIs that are not based on asymptotic theory can be calculated using parametric and non-parametric bootstrapping (\code{param.gen="MLE"}). In this case, parameter estimates are generated by the ML estimates from each bootstrapped data set. The MLE method (kem or BFGS) is determined by \code{MLEobj$method}. - -Stoffer and Wall (1991) present an algorithm for generating CIs via a non-parametric bootstrap for state-space models (\code{sim = "innovations"}). The basic idea is that the Kalman filter can be used to generate estimates of the residuals of the model fit. These residuals are then standardized and resampled and used to generate bootstrapped data using the MARSS model and its maximum-likelihood parameter estimates. One of the limitations of the Stoffer and Wall algorithm is that it cannot be used when there are missing data, unless all data at time \eqn{t} are missing. An alternative approach is a parametric bootstrap (\code{sim = "parametric"}), in which the ML parameter estimates are used to produce bootstrapped data directly from the state-space model. -} -\value{ - A list with the following components: - \item{boot.params}{ Matrix (number of params x nboot) of parameter estimates from the bootstrap. } - \item{boot.data}{ Array (n x t x nboot) of simulated (or bootstrapped) data (if requested and appropriate). } - \item{marss}{ The \code{\link{marssMODEL}} object (form="marss") that was passed in via \code{MLEobj$marss}. } - \item{nboot}{ Number of bootstraps performed. } - \item{output}{ Type of output returned.} - \item{sim}{ Type of bootstrap. } - \item{param.gen}{ Parameter generation method: "hessian" or "KalmanEM". } -} -\references{ -Holmes, E. E., E. J. Ward, and M. D. Scheuerell (2012) Analysis of multivariate time-series using the MARSS package. NOAA Fisheries, Northwest Fisheries Science -Center, 2725 Montlake Blvd E., Seattle, WA 98112 Type \code{RShowDoc("UserGuide",package="MARSS")} to open a copy. - -Stoffer, D. S., and K. D. Wall. 1991. Bootstrapping state-space models: Gaussian maximum likelihood estimation and the Kalman filter. Journal of the American Statistical Association 86:1024-1033. - -Cavanaugh, J. E., and R. H. Shumway. 1997. A bootstrap variant of AIC for state-space model selection. Statistica Sinica 7:473-496. - } -\author{ - Eli Holmes and Eric Ward, NOAA, Seattle, USA. -} -\seealso{ - \code{\link{marssMLE}}, \code{\link{marssMODEL}}, \code{\link{MARSSaic}()}, \code{\link{MARSShessian}()}, \code{\link{MARSSFisherI}()} -} -\examples{ -# nboot is set low in these examples in order to run quickly -# normally nboot would be >1000 at least -dat <- t(kestrel) -dat <- dat[2:3, ] -# maxit set low to speed up the example -kem <- MARSS(dat, - model = list(U = "equal", Q = diag(.01, 2)), - control = list(maxit = 50) -) -# bootstrap parameters from a Hessian matrix -hess.list <- MARSSboot(kem, param.gen = "hessian", nboot = 4) - -# from resampling the innovations (no missing values allowed) -boot.innov.list <- MARSSboot(kem, output = "all", sim = "innovations", nboot = 4) - -# bootstrapped parameter estimates -hess.list$boot.params -} diff --git a/man/MARSSharveyobsFI.Rd b/man/MARSSharveyobsFI.Rd deleted file mode 100644 index 22b78b1..0000000 --- a/man/MARSSharveyobsFI.Rd +++ /dev/null @@ -1,42 +0,0 @@ -\name{MARSSharveyobsFI} -\alias{MARSSharveyobsFI} -\title{ Hessian Matrix via the Harvey (1989) Recursion} -\description{ -Calculates the observed Fisher Information analytically via the recursion by Harvey (1989) as adapted by Holmes (2017) for MARSS models with linear constraints. This is the same as the Hessian of the negative log-likelihood function at the MLEs. This is a utility function in the \code{\link{MARSS-package}} and is not exported. Use \code{\link{MARSShessian}()} to access. -} -\usage{ -MARSSharveyobsFI(MLEobj) -} -\arguments{ - \item{MLEobj}{ An object of class \code{\link{marssMLE}}. - This object must have a \code{$par} element containing MLE parameter estimates from e.g. \code{\link{MARSSkem}}. } - } -\value{ - The observed Fisher Information matrix computed via equation 3.4.69 in Harvey (1989). The differentials in the equation are computed in the recursion in equations 3.4.73a to 3.4.74b. See Holmes (2016c) for a discussion of the Harvey (1989) algorithm and Holmes (2017) for the specific implementation of the algorithm for MARSS models with linear constraints. - -Harvey (1989) discusses missing observations in section 3.4.7. However, the \code{MARSSharveyobsFI()} function implements the approach of Shumway and Stoffer (2006) in section 6.4 for the missing values. See Holmes (2012) for a full discussion of the missing values modifications. -} -\author{ - Eli Holmes, NOAA, Seattle, USA. -} -\seealso{ \code{\link{MARSShessian}()}, \code{\link{MARSSparamCIs}()} } -\examples{ -dat <- t(harborSeal) -dat <- dat[c(2, 11), ] -fit <- MARSS(dat) -MARSS:::MARSSharveyobsFI(fit) -} -\references{ -R. H. Shumway and D. S. Stoffer (2006). Section 6.4 (Missing Data Modifications) in Time series analysis and its applications. Springer-Verlag, New York. - -Harvey, A. C. (1989) Section 3.4.5 (Information matrix) in Forecasting, structural time series models and the Kalman filter. Cambridge University Press, Cambridge, UK. - -See also J. E. Cavanaugh and R. H. Shumway (1996) On computing the expected Fisher information matrix for state-space model parameters. Statistics & Probability Letters 26: 347-355. This paper discusses the Harvey (1989) recursion (and proposes an alternative). - -Holmes, E. E. (2012). Derivation of the EM algorithm for constrained and unconstrained multivariate autoregressive state-space (MARSS) models. Technical Report. arXiv:1302.3919 [stat.ME] - -Holmes, E. E. 2016c. Notes on computing the Fisher Information matrix for MARSS models. Part III Overview of Harvey 1989. https://eeholmes.github.io/posts/2016-6-16-FI-recursion-3/ - -Holmes, E. E. 2017. Notes on computing the Fisher Information matrix for MARSS models. Part IV Implementing the Recursion in Harvey 1989. https://eeholmes.github.io/posts/2017-5-31-FI-recursion-4/ - -} \ No newline at end of file diff --git a/man/MARSShatyt.Rd b/man/MARSShatyt.Rd deleted file mode 100644 index f8d3095..0000000 --- a/man/MARSShatyt.Rd +++ /dev/null @@ -1,52 +0,0 @@ -\name{MARSShatyt} -\alias{MARSShatyt} -\title{ Compute Expected Value of Y, YY, and YX} -\description{ -Computes the expected value of random variables involving \eqn{\mathbf{Y}}{Y}. Users can use \code{\link[=tsSmooth.marssMLE]{tsSmooth}()} or \code{print( MLEobj, what="Ey")} to access this output. See \code{\link{print.marssMLE}}. -} -\usage{ -MARSShatyt(MLEobj, only.kem = TRUE) -} -\arguments{ - \item{ MLEobj }{ A \code{\link{marssMLE}} object with the \code{par} element of estimated parameters, \code{model} element with the model description and data. } - \item{ only.kem }{ If TRUE, return only \code{ytT}, \code{OtT}, \code{yxtT}, and \code{yxttpT} (values conditioned on the data from \eqn{1:T}) needed for the EM algorithm. If \code{only.kem=FALSE}, then also return values conditioned on data from 1 to \eqn{t-1} (\code{Ott1} and \code{ytt1}) and 1 to \eqn{t} (\code{Ott} and \code{ytt}), \code{yxtt1T} (\eqn{\textrm{var}[\mathbf{Y}_t, \mathbf{X}_{t-1}|\mathbf{y}_{1:T}]}{var[Y(t),X(t-1)|1:T]}), var.ytT (\eqn{\textrm{var}[\mathbf{Y}_t|\mathbf{y}_{1:T}]}{var[Y(t)|1:T]}), and var.EytT (\eqn{\textrm{var}_X[E_{Y|x}[\mathbf{Y}_t|\mathbf{y}_{1:T},\mathbf{x}_t]]}{var_X[E_{Y|x}[Y(t)|1:T,x(t)]]}).} - } -\details{ -For state space models, \code{MARSShatyt()} computes the expectations involving \eqn{\mathbf{Y}}{Y}. If \eqn{\mathbf{Y}}{Y} is completely observed, this entails simply replacing \eqn{\mathbf{Y}}{Y} with the observed \eqn{\mathbf{y}}{y}. When \eqn{\mathbf{Y}}{Y} is only partially observed, the expectation involves the conditional expectation of a multivariate normal. - -} -\value{ -A list with the following components (n is the number of state processes). Following the notation in Holmes (2012), \eqn{\mathbf{y}(1)}{y(1)} is the observed data (for \eqn{t=1:T}) while \eqn{\mathbf{y}(2)}{y(2)} is the unobserved data. \eqn{\mathbf{y}(1,1:t-1)}{y(1,1:t-1)} is the observed data from time 1 to \eqn{t-1}. - \item{ytT}{ E[Y(t) | Y(1,1:T)=y(1,1:T)] (n x T matrix). } - \item{ytt1}{ E[Y(t) | Y(1,1:t-1)=y(1,1:t-1)] (n x T matrix). } - \item{ytt}{ E[Y(t) | Y(1,1:t)=y(1,1:t)] (n x T matrix). } - \item{OtT}{ E[Y(t) t(Y(t)) | Y(1,1:T)=y(1,1:T)] (n x n x T array). } - \item{var.ytT}{ var[Y(t) | Y(1,1:T)=y(1,1:T)] (n x n x T array). } - \item{var.EytT}{ var_X[E_Y[Y(t) | Y(1,1:T)=y(1,1:T), X(t)=x(t)]] (n x n x T array). } - \item{Ott1}{ E[Y(t) t(Y(t)) | Y(1,1:t-1)=y(1,1:t-1)] (n x n x T array). } - \item{var.ytt1}{ var[Y(t) | Y(1,1:t-1)=y(1,1:t-1)] (n x n x T array). } - \item{var.Eytt1}{ var_X[E_Y[Y(t) | Y(1,1:t-1)=y(1,1:t-1), X(t)=x(t)]] (n x n x T array). } - \item{Ott}{ E[Y(t) t(Y(t)) | Y(1,1:t)=y(1,1:t)] (n x n x T array). } - \item{yxtT}{ E[Y(t) t(X(t)) | Y(1,1:T)=y(1,1:T)] (n x m x T array). } - \item{yxtt1T}{ E[Y(t) t(X(t-1)) | Y(1,1:T)=y(1,1:T)] (n x m x T array). } - \item{yxttpT}{ E[Y(t) t(X(t+1)) | Y(1,1:T)=y(1,1:T)] (n x m x T array). } - \item{errors}{ Any error messages due to ill-conditioned matrices. } - \item{ok}{ (TRUE/FALSE) Whether errors were generated. } -} -\references{ -Holmes, E. E. (2012) Derivation of the EM algorithm for constrained and unconstrained multivariate autoregressive state-space (MARSS) models. Technical report. arXiv:1302.3919 [stat.ME] Type \code{RShowDoc("EMDerivation",package="MARSS")} to open a copy. See the section on 'Computing the expectations in the update equations' and the subsections on expectations involving Y. -} -\author{ - Eli Holmes, NOAA, Seattle, USA. -} -\seealso{ -\code{\link{MARSS}()}, \code{\link{marssMODEL}}, \code{\link{MARSSkem}()} -} -\examples{ -dat <- t(harborSeal) -dat <- dat[2:3, ] -fit <- MARSS(dat) -EyList <- MARSShatyt(fit) -} - - diff --git a/man/MARSShessian.Rd b/man/MARSShessian.Rd deleted file mode 100644 index ad729bb..0000000 --- a/man/MARSShessian.Rd +++ /dev/null @@ -1,53 +0,0 @@ -\name{MARSShessian} -\alias{MARSShessian} - -\title{ Parameter Variance-Covariance Matrix from the Hessian Matrix} -\description{ -Calculates an approximate parameter variance-covariance matrix for the parameters using an inverse of the Hessian of the negative log-likelihood function at the MLEs (the observed Fisher Information matrix). It appends \code{$Hessian}, \code{$parMean}, \code{$parSigma} to the \code{\link{marssMLE}} object. -} -\usage{ -MARSShessian(MLEobj, method=c("Harvey1989", "fdHess", "optim")) -} -\arguments{ - \item{MLEobj}{ An object of class \code{\link{marssMLE}}. - This object must have a \code{$par} element containing MLE parameter estimates from e.g. \code{\link{MARSSkem}}. } - \item{method}{ The method to use for computing the Hessian. Options are \code{Harvey1989} to use the Harvey (1989) recursion, which is an analytical solution, \code{fdHess} or \code{optim} which are two numerical methods. Although \code{optim} can be passed to this function, in the internal functions which call this function, \code{fdHess} will be used if a numerical estimate is requested.} - } -\details{ -See \code{\link{MARSSFisherI}} for a discussion of the observed Fisher Information matrix and references. - - Method \code{fdHess} uses \code{\link[nlme]{fdHess}} from package nlme to numerically estimate the Hessian matrix (the matrix of partial 2nd derivatives of the negative log-likelihood function at the MLE). Method \code{optim} uses \code{\link{optim}} with \code{hessian=TRUE} and \code{list(maxit=0)} to ensure that the Hessian is computed at the values in the \code{par} element of the MLE object. Method \code{Harvey1989} (the default) uses the recursion in Harvey (1989) to compute the observed Fisher Information of a MARSS model analytically. - - Note that the parameter confidence intervals computed with the observed Fisher Information matrix are based on the asymptotic normality of maximum-likelihood estimates under a large-sample approximation. - -} -\value{ - \code{MARSShessian()} attaches - \code{Hessian}, \code{parMean} and \code{parSigma} to the \code{\link{marssMLE}} object that is passed into the function. -} -\author{ - Eli Holmes, NOAA, Seattle, USA. -} -\seealso{ \code{\link{MARSSFisherI}()}, \code{\link{MARSSharveyobsFI}()}, \code{\link{MARSShessian.numerical}()}, \code{\link{MARSSparamCIs}()}, \code{\link{marssMLE}} } -\examples{ -dat <- t(harborSeal) -dat <- dat[c(2, 11), ] -MLEobj <- MARSS(dat) -MLEobj.hessian <- MARSShessian(MLEobj) - -# show the approx Hessian -MLEobj.hessian$Hessian - -# generate a parameter sample using the Hessian -# this uses the rmvnorm function in the mvtnorm package -hess.params <- mvtnorm::rmvnorm(1, - mean = MLEobj.hessian$parMean, - sigma = MLEobj.hessian$parSigma -) -} -\references{ -Harvey, A. C. (1989) Section 3.4.5 (Information matrix) in Forecasting, structural time series models and the Kalman filter. Cambridge University Press, Cambridge, UK. - -See also J. E. Cavanaugh and R. H. Shumway (1996) On computing the expected Fisher information matrix for state-space model parameters. Statistics & Probability Letters 26: 347-355. This paper discusses the Harvey (1989) recursion (and proposes an alternative). -} - diff --git a/man/MARSShessian_numerical.Rd b/man/MARSShessian_numerical.Rd deleted file mode 100644 index c29e943..0000000 --- a/man/MARSShessian_numerical.Rd +++ /dev/null @@ -1,31 +0,0 @@ -\name{MARSShessian.numerical} -\alias{MARSShessian.numerical} -\title{ Hessian Matrix via Numerical Approximation} -\description{ -Calculates the Hessian of the log-likelihood function at the MLEs using either the \code{\link[nlme]{fdHess}} function in the nlme package or the \code{\link{optim}} function. This is a utility function in the \code{\link{MARSS-package}} and is not exported. Use \code{\link{MARSShessian}} to access. -} -\usage{ -MARSShessian.numerical(MLEobj, fun=c("fdHess", "optim")) -} -\arguments{ - \item{MLEobj}{ An object of class \code{\link{marssMLE}}. - This object must have a \code{$par} element containing MLE parameter estimates from e.g. \code{\link{MARSSkem}}. } - \item{fun}{ The function to use for computing the Hessian. Options are 'fdHess' or 'optim'.} - } -\details{ - Method \code{fdHess} uses \code{\link[nlme]{fdHess}} from package nlme to numerically estimate the Hessian matrix (the matrix of partial 2nd derivatives) of the negative log-likelihood function with respect to the parameters. Method \code{optim} uses \code{\link{optim}} with \code{hessian=TRUE} and \code{list(maxit=0)} to ensure that the Hessian is computed at the values in the \code{par} element of the MLE object. -} -\value{ - The numerically estimated Hessian of the log-likelihood function at the maximum likelihood estimates. -} -\author{ - Eli Holmes, NOAA, Seattle, USA. -} -\seealso{ \code{\link{MARSSharveyobsFI}()}, \code{\link{MARSShessian}()}, \code{\link{MARSSparamCIs}()} } -\examples{ -dat <- t(harborSeal) -dat <- dat[c(2, 11), ] -MLEobj <- MARSS(dat) -MARSS:::MARSShessian.numerical(MLEobj) -} - diff --git a/man/MARSSinfo.Rd b/man/MARSSinfo.Rd deleted file mode 100644 index 42f984c..0000000 --- a/man/MARSSinfo.Rd +++ /dev/null @@ -1,23 +0,0 @@ -\name{MARSSinfo} -\alias{MARSSinfo} -\title{ MARSS Error Messages and Warnings } -\description{ - Prints out more information for MARSS error messages and warnings. -} -\usage{ -MARSSinfo(number) -} -\arguments{ - \item{number}{ An error or warning message number. } -} - -\value{ - A print out of information. -} -\author{ - Eli Holmes, NOAA, Seattle, USA. -} -\examples{ -# Show all the info options -MARSSinfo() -} diff --git a/man/MARSSinits.Rd b/man/MARSSinits.Rd deleted file mode 100644 index 39ee4a6..0000000 --- a/man/MARSSinits.Rd +++ /dev/null @@ -1,41 +0,0 @@ -\name{MARSSinits} -\alias{MARSSinits} -\keyword{helper} -\title{ Initial Values for MLE } -\description{ - Sets up generic starting values for parameters for maximum-likelihood estimation algorithms that use an iterative maximization routine needing starting values. Examples of such algorithms are the EM algorithm in \code{\link{MARSSkem}()} and Newton methods in \code{\link{MARSSoptim}()}. This is a utility function in the \code{\link{MARSS-package}}. It is not exported to the user. Users looking for information on specifying initial conditions should look at the help file for \code{\link{MARSS}()} and the User Guide section on initial conditions. - -The function assumes that the user passed in the inits list using the parameter names in whatever form was specified in the \code{\link{MARSS}()} call. The default is form="marxss". The \code{\link{MARSSinits}()} function calls MARSSinits_foo, where foo is the form specified in the \code{\link{MARSS}()} call. MARSSinits_foo translates the inits list in form foo into form marss. -} -\usage{ -MARSSinits(MLEobj, inits=list(B=1, U=0, Q=0.05, Z=1, A=0, - R=0.05, x0=-99, V0=5, G=0, H=0, L=0)) -} -\arguments{ - \item{MLEobj}{ An object of class \code{\link{marssMLE}}. } - \item{inits}{ A list of column vectors (matrices with one column) of the estimated values in each parameter matrix.} -} -\details{ - Creates an \code{inits} parameter list for use by iterative maximization algorithms. - - Default values for \code{inits} is supplied in \code{MARSSsettings.R}. The user can alter these and supply any of the following (m is the dim of X and n is the dim of Y in the MARSS model): -\itemize{ - \item{elem=\code{A,U}} A numeric vector or matrix which will be constructed into \code{inits$elem} by the command \code{array(inits$elem),dim=c(n or m,1))}. If elem is fixed in the model, any \code{inits$elem} values will be overridden and replaced with the fixed value. Default is \code{array(0,dim=c(n or m,1))}. - \item{elem=\code{Q,R,B}} A numeric vector or matrix. If length equals the length \code{MODELobj$fixed$elem} then \code{inits$elem} will be constructed by \code{array(inits$elem),dim=dim(MODELobj$fixed$elem))}. If length is 1 or equals dim of \code{Q} or dim of \code{R} then \code{inits$elem} will be constructed into a diagonal matrix by the command \code{diag(inits$elem)}. If elem is fixed in the model, any \code{inits$elem} values will be overridden and replaced with the fixed value. Default is \code{diag(0.05, dim of Q or R)} for \code{Q} and \code{R}. Default is \code{diag(1,m)} for \code{B}. - \item{\code{x0}} If \code{inits$x0=-99}, then starting values for \code{x0} are estimated by a linear regression - through the count data assuming \code{A} is all zero. This will be a poor start if \code{inits$A} is not 0. If \code{inits$x0} is a numeric vector or matrix, \code{inits$x0} will be constructed by the command \code{array(inits$x0),dim=c(m,1))}. If \code{x0} is fixed in the model, any \code{inits$x0} values will be overridden and replaced with the fixed value. Default is \code{inits$x0=-99}. - \item{\code{Z}} If \code{Z} is fixed in the model, \code{inits$Z} set to the fixed value. If \code{Z} is not fixed, then the user must supply \code{inits$Z}. There is no default. - \item{elem=\code{V0}} \code{V0} is never estimated, so this is never used. -} -} -\value{ - A list with initial values for the estimated values for each parameter matrix in a MARSS model in marss form. So this will be a list with elements \code{B}, \code{U}, \code{Q}, \code{Z}, \code{A}, \code{R}, \code{x0}, \code{V0}, \code{G}, \code{H}, \code{L}. -} -\author{ - Eli Holmes, NOAA, Seattle, USA. -} -\seealso{ \code{\link{marssMODEL}}, \code{\link{MARSSkem}()}, \code{\link{MARSSoptim}()} } - -\note{ -Within the base code, a form-specific internal \code{MARSSinits} function is called to allow the output to vary based on form: \code{MARSSinits_dfa}, \code{MARSSinits_marss}, \code{MARSSinits_marxss}. -} diff --git a/man/MARSSkem.Rd b/man/MARSSkem.Rd deleted file mode 100644 index 77f4a48..0000000 --- a/man/MARSSkem.Rd +++ /dev/null @@ -1,99 +0,0 @@ -\name{MARSSkem} -\alias{MARSSkem} - -\title{ EM Algorithm function for MARSS models } -\description{ - \code{MARSSkem()} performs maximum-likelihood estimation, using an EM algorithm for constrained and unconstrained MARSS models. Users would not call this function directly normally. The function \code{\link{MARSS}()} calls \code{MARSSkem()}. However users might want to use \code{MARSSkem()} directly if they need to avoid some of the error-checking overhead associated with the \code{\link{MARSS}()} function. -} -\usage{ -MARSSkem(MLEobj) -} -\arguments{ - \item{MLEobj}{ An object of class \code{\link{marssMLE}}. } -} -\details{ -Objects of class \code{\link{marssMLE}} may be built from scratch but are easier to construct using \code{\link{MARSS}()} with \code{MARSS(..., fit=FALSE)}. - - Options for \code{MARSSkem()} may be set using \code{MLEobj$control}. The commonly used elements of \code{control} are as follows (see \code{\link{marssMLE}}): - \describe{ - \item{\code{minit}}{ Minimum number of EM iterations. You can use this to force the algorithm to do a certain number of iterations. This is helpful if your solution is not converging.} - \item{\code{maxit}}{ Maximum number of EM iterations. } - \item{\code{min.iter.conv.test}}{ The minimum number of iterations before the log-log convergence test will be computed. If \code{maxit} is set less than this, then convergence will not be computed (and the algorithm will just run for maxit iterations). } - \item{\code{kf.x0}}{ Whether to set the prior at \eqn{t=0} (\code{"x00"}) or at \eqn{t=1} (\code{"x10"}). The default is \code{"x00"}. } - \item{\code{conv.test.deltaT}}{ The number of iterations to use in the log-log convergence test. This defaults to 9. } - \item{\code{abstol}}{ Tolerance for log-likelihood change for the delta logLik convergence test. If log-likelihood changes less than this amount relative to the previous iteration, the EM algorithm exits. This is normally (default) set to NULL and the log-log convergence test is used instead.} - \item{\code{allow.degen}}{ Whether to try setting \eqn{\mathbf{Q}}{Q} or \eqn{\mathbf{R}}{R} elements to zero if they appear to be going to zero. } - \item{\code{trace}}{ A positive integer. If not 0, a record will be created of each variable over all EM iterations and detailed warning messages (if appropriate) will be printed. } - \item{\code{safe}}{ If TRUE, \code{MARSSkem} will rerun \code{\link{MARSSkf}} after each individual parameter update rather than only after all parameters are updated. The latter is slower and unnecessary for many models, but in some cases, the safer and slower algorithm is needed because the ML parameter matrices have high condition numbers. } - \item{\code{silent}}{ Suppresses printing of progress bars, error messages, warnings and convergence information. } - } -} -\value{ - The \code{\link{marssMLE}} object which was passed in, with additional components: - \item{method}{String "kem".} - \item{kf}{Kalman filter output. } - \item{iter.record}{If \code{MLEobj$control$trace = TRUE}, a list with \code{par} = a record of each estimated parameter over all EM iterations and \code{logLik} = a record of the log likelihood at each iteration. } - \item{numIter}{Number of iterations needed for convergence.} - \item{convergence}{ Did estimation converge successfully? - \describe{ - \item{convergence=0}{ Converged in both the abstol test and the log-log plot test. } - \item{convergence=1}{Some of the parameter estimates did not converge (based on the log-log plot test AND abstol tests) before \code{MLEobj$control$maxit} was reached. This is not an error per se.} - \item{convergence=3}{ No convergence diagnostics were computed because all parameters were fixed thus no fitting required. } - \item{convergence=-1}{ No convergence diagnostics were computed because the MLE object was not fit (called with fit=FALSE). This isn't a convergence error just information. There is not par element so no functions can be run with the object. } - \item{convergence=2}{No convergence diagnostics were computed because the MLE object had problems and was not fit. This isn't a convergence error just information.} - \item{convergence=10}{Abstol convergence only. Some of the parameter estimates did not converge (based on the log-log plot test) before \code{MLEobj$control$maxit} was reached. However \code{MLEobj$control$abstol} was reached. } - \item{convergence=11}{Log-log convergence only. Some of the parameter estimates did not converge (based on the abstol test) before \code{MLEobj$control$maxit} was reached. However the log-log convergence test was passed. } - \item{convergence=12}{Abstol convergence only. Log-log convergence test was not computed because \code{MLEobj$control$maxit} was set to less than \code{control$min.iter.conv.test}. } - \item{convergence=13}{Lack of convergence info. Parameter estimates did not converge based on the abstol test before \code{MLEobj$control$maxit} was reached. No log-log information since \code{control$min.iter.conv.test} is less than \code{MLEobj$control$maxit} so no log-log plot test could be done. } - \item{convergence=42}{\code{MLEobj$control$abstol} was reached but the log-log plot test returned NAs. This is an odd error and you should set \code{control$trace=TRUE} and look at the outputted \code{$iter.record} to see what is wrong. } - \item{convergence=52}{The EM algorithm was abandoned due to numerical errors. Usually this means one of the variances either went to zero or to all elements being equal. This is not an error per se. Most likely it means that your model is not very good for your data (too inflexible or too many parameters). Try setting \code{control$trace=1} to view a detailed error report.} - \item{convergence=53}{ The algorithm was abandoned due to numerical errors in the likelihood calculation from \code{\link{MARSSkf}}. } - \item{convergence=62}{The algorithm was abandoned due to errors in the log-log convergence test. You should not get this error (it is included for debugging purposes to catch improper arguments passed into the log-log convergence test). } - \item{convergence=63}{The algorithm was run for \code{control$maxit} iterations, \code{control$abstol} not reached, and the log-log convergence test returned errors. You should not get this error (it is included for debugging purposes to catch improper arguments passed into the log-log convergence test).} - \item{convergence=72}{Other convergence errors. This is included for debugging purposes to catch misc. errors. } - } - } - \item{logLik}{Log-likelihood.} - \item{states}{State estimates from the Kalman smoother.} - \item{states.se}{Confidence intervals based on state standard errors, see caption of Fig 6.3 (p. 337) in Shumway & Stoffer (2006).} - \item{errors}{Any error messages.} -} - -\section{Discussion}{ - To ensure that the global maximum-likelihood values are found, it is recommended that you test the fit under different initial parameter values, particularly if the model is not a good fit to the data. This requires more computation time, but reduces the chance of the algorithm terminating at a local maximum and not reaching the true MLEs. For many models and for draft analyses, this is unnecessary, but answers should be checked using an initial conditions search before reporting final values. See the chapter on initial conditions in the User Guide for a discussion on how to do this. - - \code{MARSSkem()} calls a Kalman filter/smoother \code{\link{MARSSkf}()} for hidden state estimation. The algorithm allows two options for the initial state conditions: fixed but unknown or a prior. In the first case, x0 (whether at t=0 or t=1) is treated as fixed but unknown (estimated); in this case, \code{fixed$V0=0} and x0 is estimated. This is the default behavior. In the second case, the initial conditions are specified with a prior and V0!=0. In the later case, x0 or V0 may be estimated. MARSS will allow you to try to estimate both, but many researchers have noted that this is not robust so you should fix one or the other. - - If you get errors, you can type \code{\link{MARSSinfo}()} for help. Fitting problems often mean that the solution involves an ill-conditioned matrix. For example, your \eqn{\mathbf{Q}}{Q} or \eqn{\mathbf{R}}{R} matrix is going to a value in which all elements have the same value, for example zero. If for example, you tried to fit a model with a fixed \eqn{\mathbf{R}}{R} matrix with high values on the diagonal and the variance in that \eqn{\mathbf{R}}{R} matrix (diagonal terms) was much higher than what is actually in the data, then you might drive \eqn{\mathbf{Q}}{Q} to zero. Also if you try to fit a structurally inadequate model, then it is not unusual that \eqn{\mathbf{Q}}{Q} will be driven to zero. For example, if you fit a model with 1 hidden state trajectory to data that clearly have 2 quite different hidden state trajectories, you might have this problem. Comparing the likelihood of this model to a model with more structural flexibility should reveal that the structurally inflexible model is inadequate (much lower likelihood). - - Convergence testing is done via a combination of two tests. The first test (abstol test) is the test that the change in the absolute value of the log-likelihood from one iteration to another is less than some tolerance value (abstol). The second test (log-log test) is that the slope of a plot of the log of the parameter value or log-likelihood versus the log of the iteration number is less than some tolerance. Both of these must be met to generate the Success! parameters converged output. If you want to circumvent one of these tests, then set the tolerance for the unwanted test to be high. That will guarantee that that test is met before the convergence test you want to use is met. The tolerance for the abstol test is set by \code{control$abstol} and the tolerance for the log-log test is set by \code{control$conv.test.slope.tol}. Anything over 1 is huge for both of these. - } - -\references{ - R. H. Shumway and D. S. Stoffer (2006). Chapter 6 in Time series analysis and its applications. Springer-Verlag, New York. - - Ghahramani, Z. and Hinton, G. E. (1996) Parameter estimation for linear dynamical systems. Technical Report CRG-TR-96-2, University of Toronto, Dept. of Computer Science. - -Harvey, A. C. (1989) Chapter 5 in Forecasting, structural time series models and the -Kalman filter. Cambridge University Press, Cambridge, UK. - -The MARSS User Guide: Holmes, E. E., E. J. Ward, and M. D. Scheuerell (2012) Analysis of multivariate time-series using the MARSS package. NOAA Fisheries, Northwest Fisheries Science -Center, 2725 Montlake Blvd E., Seattle, WA 98112 Go to \href{https://cran.r-project.org/package=MARSS/vignettes/UserGuide.pdf}{User Guide} to open the most recent version. - -Holmes, E. E. (2012). Derivation of the EM algorithm for constrained and unconstrained multivariate autoregressive -state-space (MARSS) models. Technical Report. arXiv:1302.3919 [stat.ME] \href{https://cran.r-project.org/package=MARSS/vignettes/EMDerivation.pdf}{EMDerivation} has the most recent version. -} -\author{ - Eli Holmes and Eric Ward, NOAA, Seattle, USA. -} -\seealso{ - \code{\link{MARSSkf}()}, \code{\link{marssMLE}}, \code{\link{MARSSoptim}()}, \code{\link{MARSSinfo}()} - } -\examples{ -dat <- t(harborSeal) -dat <- dat[2:4, ] -# you can use MARSS to construct a proper marssMLE object. -fit <- MARSS(dat, model = list(Q = "diagonal and equal", U = "equal"), fit = FALSE) -# Pass this marssMLE object to MARSSkem to do the fit. -kemfit <- MARSSkem(fit) -} diff --git a/man/MARSSkemcheck.Rd b/man/MARSSkemcheck.Rd deleted file mode 100644 index a852698..0000000 --- a/man/MARSSkemcheck.Rd +++ /dev/null @@ -1,22 +0,0 @@ -\name{MARSSkemcheck} -\alias{MARSSkemcheck} -\keyword{internal} - -\title{ Model Checking for MLE objects Passed to MARSSkem } -\description{ - This is a helper function in the \code{\link{MARSS-package}} that checks that the model can be handled by the \code{\link{MARSSkem}} algorithm. It also returns the structure of the model as a list of text strings. -} -\usage{ -MARSSkemcheck(MLEobj) -} -\arguments{ - \item{ MLEobj }{ An object of class \code{\link{marssMLE}}. } -} -\value{ - A list with of the model elements A, B, Q, R, U, x0, Z, V0 specifying the structure of the model using text strings). -} -\author{ - Eli Holmes, NOAA, Seattle, USA. -} -\seealso{ \code{\link{marssMODEL}}, \code{\link{MARSSkem}()} } - diff --git a/man/MARSSresiduals_tt1.Rd b/man/MARSSresiduals_tt1.Rd deleted file mode 100644 index e9ee95f..0000000 --- a/man/MARSSresiduals_tt1.Rd +++ /dev/null @@ -1,122 +0,0 @@ -\name{MARSSresiduals.tt1} -\alias{MARSSresiduals.tt1} - -\title{ MARSS One-Step-Ahead Residuals } -\description{ - Calculates the standardized (or auxiliary) one-step-ahead residuals, aka the innovations residuals and their variance. Not exported. Access this function with \code{MARSSresiduals(object, type="tt1")}. To get the residuals as a data frame in long-form, use \code{\link[=residuals.marssMLE]{residuals}(object, type="tt1")}. -} -\usage{ -MARSSresiduals.tt1(object, method = c("SS"), normalize = FALSE, - silent = FALSE, fun.kf = c("MARSSkfas", "MARSSkfss")) -} -\arguments{ - \item{object}{ An object of class \code{\link{marssMLE}}.} - \item{method}{ Algorithm to use. Currently only "SS". } - \item{normalize}{ TRUE/FALSE See details. } - \item{silent}{ If TRUE, don't print inversion warnings. } - \item{fun.kf}{ Can be ignored. This will change the Kalman filter/smoother function from the value in object$fun.kf if desired. } -} -\value{ -A list with the following components - \item{model.residuals}{ The the observed one-step-ahead model residuals: data minus the model predictions conditioned on the data \eqn{t=1} to \eqn{t-1}. These are termed innovations. A n x T matrix. NAs will appear where the data are missing. } - \item{state.residuals}{ The one-step-ahead state residuals \eqn{ \mathbf{x}_{t+1}^{t+1} - \mathbf{B}\mathbf{x}_{t}^t - \mathbf{u} }{ xtt(t+1) - B xtt(t) - u} -. Note, state residual at time \eqn{t} is the transition from time \eqn{t=t} to \eqn{t+1}. } - \item{residuals}{ The residuals conditioned on the observed data up to time \eqn{t-1}. Returned as a (n+m) x T matrix with \code{model.residuals} in rows 1 to n and \code{state.residuals} in rows n+1 to n+m. NAs will appear in rows 1 to n in the places where data are missing. } - \item{var.residuals}{ The joint variance of the one-step-ahead residuals. Returned as a n+m x n+m x T matrix. } - \item{std.residuals}{ The Cholesky standardized residuals as a n+m x T matrix. This is \code{residuals} multiplied by the inverse of the lower triangle of the Cholesky decomposition of \code{var.residuals}. The model standardized residuals associated with the missing data are replaced with NA. } - \item{mar.residuals}{ The marginal standardized residuals as a n+m x T matrix. This is \code{residuals} multiplied by the inverse of the diagonal matrix formed by the square-root of the diagonal of \code{var.residuals}. The model marginal residuals associated with the missing data are replaced with NA. } - \item{bchol.residuals}{ The Block Cholesky standardized residuals as a (n+m) x T matrix. This is \code{model.residuals} multiplied by the inverse of the lower triangle of the Cholesky decomposition of \code{var.residuals[1:n,1:n,]} and \code{state.residuals} multiplied by the inverse of the lower triangle of the Cholesky decomposition of \code{var.residuals[(n+1):(n+m),(n+1):(n+m),]}.} - \item{E.obs.residuals}{ The expected value of the model residuals conditioned on the observed data \eqn{t=1} to \eqn{t-1}. Returned as a n x T matrix. Because all the data at time \eqn{t} are unobserved for the purpose of estimation (since conditioning is from \eqn{t=1} to \eqn{t-1}), this will be all 0s (unlike the case where we condition on the data from \eqn{t=1} to \eqn{T} or to \eqn{t}). This and \code{var.obs.residuals} are included for completeness since they are returned for \code{\link{MARSSresiduals.tT}()}, but they are not relevant for one-step-ahead residuals. See the discussion there. } - \item{var.obs.residuals}{ For one-step-ahead residuals, this will be the same as the 1:n, 1:n upper diagonal block in \code{var.residuals} since none of the \eqn{t} data affect the residuals at time \eqn{t} (the model residuals are conditioned only on the data up to \eqn{t-1}). This is different for smoothation residuals which are conditioned on the data from \eqn{t=1} to \eqn{T}. This and \code{E.obs.residuals} are included for completeness since they are returned for \code{\link{MARSSresiduals.tT}()}, but they are not relevant for one-step-ahead residuals. See the discussion there. Note, also included as a code check. They are computed differently, but \code{var.obs.residuals} and \code{var.residuals} should always be the same.} - \item{msg}{ Any warning messages. This will be printed unless \code{object$control$trace = -1} (suppress all error messages). } - -} -\details{ - -This function returns the conditional expected value (mean) and variance of the one-step-ahead residuals. 'conditional' means in this context, conditioned on the observed data up to time \eqn{t-1} and a set of parameters. - -\strong{Model residuals} - -\eqn{\mathbf{v}_t}{v_t} is the difference between the data and the predicted data at time \eqn{t} given \eqn{\mathbf{x}_t}{x(t)}: -\deqn{ \mathbf{v}_t = \mathbf{y}_t - \mathbf{Z} \mathbf{x}_t - \mathbf{a} - \mathbf{D}\mathbf{d}_t}{ v(t) = y(t) - Z x(t) - a - D d(t)} -The observed model residuals \eqn{\hat{\mathbf{v}}_t}{hatv(t)} are the difference between the observed data and the predicted data at time \eqn{t} using the fitted model. \code{MARSSresiduals.tt1} fits the model using the data up to time \eqn{t-1}. So -\deqn{ \hat{\mathbf{v}}_t = \mathbf{y}_t - \mathbf{Z}\mathbf{x}_t^{t-1} - \mathbf{a} - \mathbf{D}\mathbf{d}_t}{ hatv(t) = y(t) - Z xtt1(t) - a - D d(t)} -where \eqn{\mathbf{x}_t^{t-1}}{xtt1(t)} is the expected value of \eqn{\mathbf{X}_t}{X(t)} conditioned on the data from $t=1$ to \eqn{t-1} from the Kalman filter. \eqn{\mathbf{y}_t}{y(t)} are your data and missing values will appear as NA. - -\strong{State residuals} - -\eqn{\mathbf{w}_{t+1}}{w(t+1)} are the difference between the state at time \eqn{t+1} and the expected value of the state at time \eqn{t+1} given the state at time \eqn{t}: -\deqn{ \mathbf{w}_{t+1} = \mathbf{x}_{t+1} - \mathbf{B} \mathbf{x}_{t} - \mathbf{u} - \mathbf{C}\mathbf{c}_{t+1}}{ w(t+1) = x(t+1) - B x(t) - u - C c(t+1)} -The estimated state residuals \eqn{\hat{\mathbf{w}}_{t+1}}{hatw(t+1)} are the difference between estimate of \eqn{\mathbf{x}_{t+1}}{x(t+1)} minus the estimate using \eqn{\mathbf{x}_{t}}{x(t)}. -\deqn{ \hat{\mathbf{w}}_{t+1} = \mathbf{x}_{t+1}^{t+1} - \mathbf{B}\mathbf{x}_{t}^t - \mathbf{u} - \mathbf{C}\mathbf{c}_{t+1}}{ hatw(t+1) = xtt(t+1) - B xtt(t) - u - C c(t+1)} -where \eqn{\mathbf{x}_{t+1}^{t+1}}{xtt(t+1)} is the Kalman filter estimate of the states at time \eqn{t+1} conditioned on the data up to time \eqn{t+1} and \eqn{\mathbf{x}_{t}^t}{xtt(t)} is the Kalman filter estimate of the states at time \eqn{t} conditioned on the data up to time \eqn{t}. -The estimated state residuals \eqn{\mathbf{w}_{t+1}}{w(t+1)} are returned in \code{state.residuals} and rows \eqn{n+1} to \eqn{n+m} of \code{residuals}. \code{state.residuals[,t]} is \eqn{\mathbf{w}_{t+1}}{w(t+1)} (notice time subscript difference). There are no NAs in the estimated state residuals (except for the last time step) as an estimate of the state exists whether or not there are associated data. - -\code{res1} and \code{res2} in the code below will be the same. -\preformatted{dat <- t(harborSeal)[2:3,] -TT <- ncol(dat) -fit <- MARSS(dat) -B <- coef(fit, type="matrix")$B -U <- coef(fit, type="matrix")$U -xt <- MARSSkfss(fit)$xtt[,1:(TT-1)] # t 1 to TT-1 -xtp1 <- MARSSkfss(fit)$xtt[,2:TT] # t 2 to TT -res1 <- xtp1 - B \%*\% xt - U \%*\% matrix(1,1,TT-1) -res2 <- MARSSresiduals(fit, type="tt1")$state.residuals -} - -\strong{Joint residual variance} - -In a state-space model, \eqn{\mathbf{X}}{X} and \eqn{\mathbf{Y}}{Y} are stochastic, and the model and state residuals are random variables \eqn{\hat{\mathbf{V}}_t}{hatV(t)} and \eqn{\hat{\mathbf{W}}_{t+1}}{hatW(t+1)}. The joint distribution of \eqn{\hat{\mathbf{V}}_{t}, \hat{\mathbf{W}}_{t+1}}{hatV(t), hatW(t+1)} is the distribution across all the different possible data sets that our MARSS equations with parameters \eqn{\Theta}{Theta} might generate. Denote the matrix of \eqn{\hat{\mathbf{V}}_{t}, \hat{\mathbf{W}}_{t+1}}{hatV(t), hatW(t+1)}, as \eqn{\widehat{\mathcal{E}}_{t}}{Epsilon(t)}. That distribution has an expected value (mean) and variance: -\deqn{ \textrm{E}[\widehat{\mathcal{E}}_t] = 0; \textrm{var}[\widehat{\mathcal{E}}_t] = \hat{\Sigma}_t }{ E[Epsilon(t)] = 0; var[Epsilon(t)] = hatSigma(t)} -Our observed residuals \code{residuals} are one sample from this distribution. -To standardize the observed residuals, we will use \eqn{ \hat{\Sigma}_t }{ hatSigma(t) }. \eqn{ \hat{\Sigma}_t }{ hatSigma(t) } is returned in \code{var.residuals}. Rows/columns 1 to \eqn{n} are the conditional variances of the model residuals and rows/columns \eqn{n+1} to \eqn{n+m} are the conditional variances of the state residuals. The off-diagonal blocks are the covariances between the two types of residuals. For one-step-ahead residuals (unlike smoothation residuals \link{MARSSresiduals.tT}), the covariance is zero. - -\code{var.residuals} returned by this function is the conditional variance of the residuals conditioned on the data up to \eqn{t-1} and the parameter set \eqn{\Theta}{Theta}. The conditional variance for the model residuals is -\deqn{ \hat{\Sigma}_t = \mathbf{R}+\mathbf{Z}_t \mathbf{V}_t^{t-1} \mathbf{Z}_t^\top }{hatSigma(t) = R + Z Vtt1 t(Z)} -where \eqn{\mathbf{V}_t^{t-1}}{Vtt1} is the variance of \eqn{\mathbf{X}_t}{X(t)} conditioned on the data up to time \eqn{t-1}. This is returned by \code{\link{MARSSkf}} in \code{Vtt1}. The innovations variance is also returned in \code{Sigma} from \code{\link{MARSSkf}} and are used in the innovations form of the likelihood calculation. - -\strong{Standardized residuals} - -\code{std.residuals} are Cholesky standardized residuals. These are the residuals multiplied by the inverse of the lower triangle of the Cholesky decomposition of the variance matrix of the residuals: -\deqn{ \hat{\Sigma}_t^{-1/2} \hat{\mathbf{v}}_t}{ hatSigma(t)^{-1/2} hatv(t). } -These residuals are uncorrelated unlike marginal residuals. - -The interpretation of the Cholesky standardized residuals is not straight-forward when the \eqn{\mathbf{Q}}{Q} and \eqn{\mathbf{R}}{R} variance-covariance matrices are non-diagonal. The residuals which were generated by a non-diagonal variance-covariance matrices are transformed into orthogonal residuals in \eqn{\textrm{MVN}(0,\mathbf{I})}{MVN(0,I)} space. For example, if v is 2x2 correlated errors with variance-covariance matrix R. The transformed residuals (from this function) for the i-th row of v is a combination of the row 1 effect and the row 1 effect plus the row 2 effect. So in this case, row 2 of the transformed residuals would not be regarded as solely the row 2 residual but rather how different row 2 is from row 1, relative to expected. If the errors are highly correlated, then the Cholesky standardized residuals can look rather non-intuitive. - -\code{mar.residuals} are the marginal standardized residuals. These are the residuals multiplied by the inverse of the diagonal matrix formed from the square-root of the diagonal of the variance matrix of the residuals: -\deqn{ \textrm{dg}(\hat{\Sigma}_t)^{-1/2} \hat{\mathbf{v}}_t}{ dg(hatSigma(t))^{-1/2} hatv(t)}, where 'dg(A)' is the square matrix formed from the diagonal of A, aka \code{diag(diag(A))}. These residuals will be correlated if the variance matrix is non-diagonal. - -The Block Cholesky standardized residuals are like the Cholesky standardized residuals except that the full variance-covariance matrix is not used, only the variance-covariance matrix for the model or state residuals (respectively) is used for standardization. For the one-step-ahead case, the model and state residuals are independent (unlike in the smoothations case) thus the Cholesky and Block Cholesky standardized residuals will be identical (unlike in the smoothations case). - -\strong{Normalized residuals} - -If \code{normalize=FALSE}, the unconditional variance of \eqn{\mathbf{V}_t}{V(t)} and \eqn{\mathbf{W}_t}{W(t)} are \eqn{\mathbf{R}}{R} and \eqn{\mathbf{Q}}{Q} and the model is assumed to be written as -\deqn{\mathbf{y}_t = \mathbf{Z} \mathbf{x}_t + \mathbf{a} + \mathbf{v}_t}{ y(t) = Z x(t) + a + v(t)} -\deqn{\mathbf{x}_t = \mathbf{B} \mathbf{x}_{t-1} + \mathbf{u} + \mathbf{w}_t}{ x(t) = B x(t-1) + u + w(t)} -If normalize=TRUE, the model is assumed to be written -\deqn{\mathbf{y}_t = \mathbf{Z} \mathbf{x}_t + \mathbf{a} + \mathbf{H}\mathbf{v}_t}{ y(t) = Z x(t) + a + Hv(t)} -\deqn{\mathbf{x}_t = \mathbf{B} \mathbf{x}_{t-1} + \mathbf{u} + \mathbf{G}\mathbf{w}_t}{ x(t) = B x(t-1) + u + Gw(t)} -with the variance of \eqn{\mathbf{V}_t}{V(t)} and \eqn{\mathbf{W}_t}{W(t)} equal to \eqn{\mathbf{I}}{I} (identity). - -\code{MARSSresiduals} returns the residuals defined as in the first equations. To get the residuals defined as Harvey et al. (1998) define them (second equations), then use \code{normalize=TRUE}. In that case the unconditional variance of residuals will be \eqn{\mathbf{I}}{I} instead of \eqn{\mathbf{Q}}{Q} and \eqn{\mathbf{R}}{R}. Note, that the normalized residuals are not the same as the standardized residuals. In former, the unconditional residuals have a variance of \eqn{\mathbf{I}}{I} while in the latter it is the conditional residuals that have a variance of \eqn{\mathbf{I}}{I}. - -} - -\author{ - Eli Holmes, NOAA, Seattle, USA. -} -\seealso{ \code{\link{MARSSresiduals.tT}()}, \code{\link{MARSSresiduals.tt}()}, \code{\link{fitted.marssMLE}()}, \code{\link{plot.marssMLE}()} } -\examples{ - dat <- t(harborSeal) - dat <- dat[c(2,11),] - fit <- MARSS(dat) - - MARSSresiduals(fit, type="tt1")$std.residuals - residuals(fit, type="tt1") -} -\references{ -R. H. Shumway and D. S. Stoffer (2006). Section on the calculation of the likelihood of state-space models in Time series analysis and its applications. Springer-Verlag, New York. - -Holmes, E. E. 2014. Computation of standardized residuals for (MARSS) models. Technical Report. arXiv:1411.0045. -} - diff --git a/man/MARSSresiduals_ttt.Rd b/man/MARSSresiduals_ttt.Rd deleted file mode 100644 index 7bc17f9..0000000 --- a/man/MARSSresiduals_ttt.Rd +++ /dev/null @@ -1,91 +0,0 @@ -\name{MARSSresiduals.tt} -\alias{MARSSresiduals.tt} - -\title{ MARSS Contemporaneous Residuals } -\description{ - Calculates the standardized (or auxiliary) contemporaneous residuals, aka the residuals and their variance conditioned on the data up to time \eqn{t}. Contemporaneous residuals are only for the observations. Not exported. Access this function with \code{MARSSresiduals(object, type="tt")}. -} -\usage{ -MARSSresiduals.tt(object, method = c("SS"), normalize = FALSE, - silent = FALSE, fun.kf = c("MARSSkfas", "MARSSkfss")) -} -\arguments{ - \item{object}{ An object of class \code{\link{marssMLE}}.} - \item{method}{ Algorithm to use. Currently only "SS". } - \item{normalize}{ TRUE/FALSE See details. } - \item{silent}{ If TRUE, don't print inversion warnings. } - \item{fun.kf}{ Can be ignored. This will change the Kalman filter/smoother function from the value in object$fun.kf if desired. } -} -\value{ -A list with the following components - \item{model.residuals}{ The observed contemporaneous model residuals: data minus the model predictions conditioned on the data 1 to t. A n x T matrix. NAs will appear where the data are missing. } - \item{state.residuals}{ All NA. There are no contemporaneous residuals for the states. } - \item{residuals}{ The residuals. \code{model.residuals} are in rows 1:n and \code{state.residuals} are in rows n+1:n+m. } - \item{var.residuals}{ The joint variance of the residuals conditioned on observed data from 1 to t-. This only has values in the 1:n,1:n upper block for the model residuals. } - \item{std.residuals}{ The Cholesky standardized residuals as a n+m x T matrix. This is \code{residuals} multiplied by the inverse of the lower triangle of the Cholesky decomposition of \code{var.residuals}. The model standardized residuals associated with the missing data are replaced with NA. Note because the contemporaneous state residuals do not exist, rows n+1:n+m are all NA. } - \item{mar.residuals}{ The marginal standardized residuals as a n+m x T matrix. This is \code{residuals} multiplied by the inverse of the diagonal matrix formed by the square-root of the diagonal of \code{var.residuals}. The model marginal residuals associated with the missing data are replaced with NA. } - \item{bchol.residuals}{ Because state residuals do not exist, this will be equivalent to the Cholesky standardized residuals, \code{std.residuals}. } - \item{E.obs.residuals}{ The expected value of the model residuals conditioned on the observed data 1 to t. Returned as a n x T matrix. } - \item{var.obs.residuals}{ The variance of the model residuals conditioned on the observed data. Returned as a n x n x T matrix. For observed data, this will be 0. See \code{\link{MARSSresiduals.tT}()} for a discussion of these residuals and where they might be used. } - \item{msg}{ Any warning messages. This will be printed unless Object$control$trace = -1 (suppress all error messages). } - -} -\details{ - -This function returns the conditional expected value (mean) and variance of the model contemporaneous residuals. 'conditional' means in this context, conditioned on the observed data up to time \eqn{t} and a set of parameters. - -\strong{Model residuals} - -\eqn{\mathbf{v}_t}{v(t)} is the difference between the data and the predicted data at time \eqn{t} given \eqn{\mathbf{x}_t}{x(t)}: -\deqn{ \mathbf{v}_t = \mathbf{y}_t - \mathbf{Z} \mathbf{x}_t - \mathbf{a} - \mathbf{d}\mathbf{d}_{t}}{ v(t) = y(t) - Z x(t) - a - D d(t)} -The observed model residuals \eqn{\hat{\mathbf{v}}_t}{hatv(t)} are the difference between the observed data and the predicted data at time \eqn{t} using the fitted model. \code{MARSSresiduals.tt} fits the model using the data up to time \eqn{t}. So -\deqn{ \hat{\mathbf{v}}_t = \mathbf{y}_t - \mathbf{Z}\mathbf{x}_t^{t} - \mathbf{a} - \mathbf{D}\mathbf{d}_{t}}{ hatv(t) = y(t) - Z xtt - a - D d(t)} -where \eqn{\mathbf{x}_t^{t}}{xtt(t)} is the expected value of \eqn{\mathbf{X}_t}{X(t)} conditioned on the data from 1 to \eqn{t} from the Kalman filter. \eqn{\mathbf{y}_t}{y(t)} are your data and missing values will appear as NA. These will be returned in \code{residuals}. - -\code{var.residuals} returned by the function is the conditional variance of the residuals conditioned on the data up to \eqn{t} and the parameter set \eqn{\Theta}{Theta}. The conditional variance is -\deqn{ \hat{\Sigma}_t = \mathbf{R}+\mathbf{Z} \mathbf{V}_t^{t} \mathbf{Z}^\top }{hatSigma(t) = R + Z Vtt t(Z)} -where \eqn{\mathbf{V}_t^{t}}{Vtt} is the variance of \eqn{\mathbf{X}_t}{X(t)} conditioned on the data up to time \eqn{t}. This is returned by \code{\link{MARSSkfss}} in \code{Vtt}. - -\strong{Standardized residuals} - -\code{std.residuals} are Cholesky standardized residuals. These are the residuals multiplied by the inverse of the lower triangle of the Cholesky decomposition of the variance matrix of the residuals: -\deqn{ \hat{\Sigma}_t^{-1/2} \hat{\mathbf{v}}_t}{ hatSigma(t)^{-1/2} hatv(t) }. -These residuals are uncorrelated unlike marginal residuals. - -The interpretation of the Cholesky standardized residuals is not straight-forward when the \eqn{\mathbf{Q}}{Q} and \eqn{\mathbf{R}}{R} variance-covariance matrices are non-diagonal. The residuals which were generated by a non-diagonal variance-covariance matrices are transformed into orthogonal residuals in \eqn{\textrm{MVN}(0,\mathbf{I})}{MVN(0,I)} space. For example, if v is 2x2 correlated errors with variance-covariance matrix R. The transformed residuals (from this function) for the i-th row of v is a combination of the row 1 effect and the row 1 effect plus the row 2 effect. So in this case, row 2 of the transformed residuals would not be regarded as solely the row 2 residual but rather how different row 2 is from row 1, relative to expected. If the errors are highly correlated, then the Cholesky standardized residuals can look rather non-intuitive. - -\code{mar.residuals} are the marginal standardized residuals. These are the residuals multiplied by the inverse of the diagonal matrix formed from the square-root of the diagonal of the variance matrix of the residuals: -\deqn{ \textrm{dg}(\hat{\Sigma}_t)^{-1/2} \hat{\mathbf{v}}_t}{ dg(hatSigma(t))^{-1/2} hatv(t)}, where 'dg(A)' is the square matrix formed from the diagonal of A, aka \code{diag(diag(A))}. These residuals will be correlated if the variance matrix is non-diagonal. - -\strong{Normalized residuals} - -If \code{normalize=FALSE}, the unconditional variance of \eqn{\mathbf{V}_t}{V(t)} and \eqn{\mathbf{W}_t}{W(t)} are \eqn{\mathbf{R}}{R} and \eqn{\mathbf{Q}}{Q} and the model is assumed to be written as -\deqn{\mathbf{y}_t = \mathbf{Z} \mathbf{x}_t + \mathbf{a} + \mathbf{v}_t}{ y(t) = Z x(t) + a + v(t)} -\deqn{\mathbf{x}_t = \mathbf{B} \mathbf{x}_{t-1} + \mathbf{u} + \mathbf{w}_t}{ x(t) = B x(t-1) + u + w(t)} -If normalize=TRUE, the model is assumed to be written -\deqn{\mathbf{y}_t = \mathbf{Z} \mathbf{x}_t + \mathbf{a} + \mathbf{H}\mathbf{v}_t}{ y(t) = Z x(t) + a + Hv(t)} -\deqn{\mathbf{x}_t = \mathbf{B} \mathbf{x}_{t-1} + \mathbf{u} + \mathbf{G}\mathbf{w}_t}{ x(t) = B x(t-1) + u + Gw(t)} -with the variance of \eqn{\mathbf{V}_t}{V(t)} and \eqn{\mathbf{W}_t}{W(t)} equal to \eqn{\mathbf{I}}{I} (identity). - -\code{MARSSresiduals()} returns the residuals defined as in the first equations. To get normalized residuals (second equation) as used in Harvey et al. (1998), then use \code{normalize=TRUE}. In that case the unconditional variance of residuals will be \eqn{\mathbf{I}}{I} instead of \eqn{\mathbf{R}}{R} and \eqn{\mathbf{Q}}{Q}. Note, that the normalized residuals are not the same as the standardized residuals. In former, the unconditional residuals have a variance of \eqn{\mathbf{I}}{I} while in the latter it is the conditional residuals that have a variance of \eqn{\mathbf{I}}{I}. - -} - -\author{ - Eli Holmes, NOAA, Seattle, USA. -} -\seealso{ \code{\link{MARSSresiduals.tT}()}, \code{\link{MARSSresiduals.tt1}()}, \code{\link{fitted.marssMLE}()}, \code{\link{plot.marssMLE}()} } -\examples{ - dat <- t(harborSeal) - dat <- dat[c(2,11),] - fit <- MARSS(dat) - - # Returns a matrix - MARSSresiduals(fit, type="tt")$std.residuals - # Returns a data frame in long form - residuals(fit, type="tt") -} -\references{ -Holmes, E. E. 2014. Computation of standardized residuals for (MARSS) models. Technical Report. arXiv:1411.0045. -} - diff --git a/man/MARSSsimulate.Rd b/man/MARSSsimulate.Rd deleted file mode 100644 index 2fbf383..0000000 --- a/man/MARSSsimulate.Rd +++ /dev/null @@ -1,50 +0,0 @@ -\name{MARSSsimulate} -\alias{MARSSsimulate} -\alias{simulate.marssMLE} -\title{ Simulate Data from a MARSS Model } -\description{ - Generates simulated data from a MARSS model with specified parameter estimates. This is a base function in the \code{\link{MARSS-package}}. -} -\usage{ -MARSSsimulate(object, tSteps = NULL, nsim = 1, silent = TRUE, - miss.loc = NULL) -} -\arguments{ - \item{object}{ A fitted \code{\link{marssMLE}} object, as output by \code{\link{MARSS}()}. } - \item{tSteps}{ Number of time steps in each simulation. If left off, it is taken to be consistent with \code{MLEobj}.} - \item{nsim}{ Number of simulated data sets to generate. } - \item{silent}{ Suppresses progress bar. } - \item{miss.loc}{ Optional matrix specifying where to put missing values. See Details. } -} -\details{ - Optional argument \code{miss.loc} is an array of dimensions n x tSteps x nsim, specifying where to put missing values - in the simulated data. If missing, this would be constructed using \code{MLEobj$marss$data}. If the locations of the missing values are the same for all simulations, \code{miss.loc} can be a matrix of \code{dim=c(n, tSteps)} (the original data for example). The default, if \code{miss.loc} is left off, is that there are no missing values even if \code{MLEobj$marss$data} has missing values. -} -\value{ - \item{sim.states}{ Array (dim m x tSteps x nsim) of state processes simulated from parameter estimates. m is the number of states (rows in X).} - \item{sim.data}{ Array (dim n x tSteps x nsim) of data simulated from parameter estimates. n is the number of rows of data (Y).} - \item{MLEobj}{ The \code{\link{marssMLE}} object from which the data were simulated. } - \item{miss.loc}{ Matrix identifying where missing values were placed. It should be exactly the same dimensions as the data matrix. The location of NAs in the miss.loc matrix indicate where the missing values are. } - \item{tSteps}{ Number of time steps in each simulation. } - \item{nsim}{ Number of simulated data sets generated. } -} -\author{ - Eli Holmes and Eric Ward, NOAA, Seattle, USA. -} -\seealso{ - \code{\link{marssMODEL}}, \code{\link{marssMLE}}, \code{\link{MARSSboot}()} -} -\examples{ -d <- harborSeal[, c(2, 11)] -dat <- t(d) -fit <- MARSS(dat) - -# simulate data that are the -# same length as original data and no missing data -sim.obj <- MARSSsimulate(fit, tSteps = dim(d)[1], nsim = 5) - -# simulate data that are the -# same length as original data and have missing data in the same location -sim.obj <- MARSSsimulate(fit, tSteps = dim(d)[1], nsim = 5, miss.loc = dat) -} - diff --git a/man/MARSSvectorizeparam.Rd b/man/MARSSvectorizeparam.Rd deleted file mode 100644 index 94cb971..0000000 --- a/man/MARSSvectorizeparam.Rd +++ /dev/null @@ -1,34 +0,0 @@ -\name{MARSSvectorizeparam} -\alias{MARSSvectorizeparam} -\keyword{internal} - -\title{ Vectorize or Replace the par List } -\description{ - Converts \code{MLEobj[["what"]]} to a vector or assigns a vector to \code{MLEobj[["what"]]}. This is a utility function in the \code{\link{MARSS-package}} for \code{\link{marssMODEL}} objects of form="marss" and is not exported. Users achieve this functionality with \code{\link[=coef.marssMLE]{coef}}. -} -\usage{ -MARSSvectorizeparam(MLEobj, parvec = NA, what = "par") -} -\arguments{ - \item{MLEobj}{ An object of class \code{\link{marssMLE}}. } - \item{parvec}{ NA or a vector. See Value. } - \item{what}{ What part of the MLEobj is being replaced or vectorized. Need to be a par list. } -} -\details{ - Utility function to generate parameter vectors for optimization functions, and to set \code{MLEobj[[what]]} using a vector of values. The function bases the unlisting and naming order on \code{names(MLEobj$marss$fixed)}. Appends matrix name to the row names in the par list. -} -\value{ - If parvec=NA, a vector of the elements of the \code{what} element. Otherwise, a \code{\link{marssMLE}} object with \code{MLEobj[["what"]]} set by parvec. -} -\author{ - Eli Holmes and Kellie Wills, NOAA, Seattle, USA. -} -\seealso{ \code{\link{marssMLE}} } -\examples{ -dat <- t(harborSealWA) -dat <- dat[2:4, ] -kem <- MARSS(dat) -paramvec <- MARSS:::MARSSvectorizeparam(kem) -paramvec -} -