predict() for glm() and glmm()

Ben Bolker proposed a very efficient way to predict the confidence interval for result of glmer() (library lme4) (fonction easyPredCI() in http://ms.mcmaster.ca/~bolker/R/misc/foxchapter/bolker_chap.html).
However, the code provided by Ben Bolker does not work for factors included in fixed effects. Furthermore, I expand the function to be used also with results of glmmPQL() (library MASS), glmmadmb() (library glmmADMB) or glm().

Here are the modifications of the code and examples of its use (in green, the new parts):

easyPredCI <- function(model, newdata=NULL, alpha=0.05) {
  # Marc Girondot - 2016-01-09
  if (is.null(newdata)) {
    if (any(class(model)=="glmerMod")) newdata <- model@frame
    if (any(class(model)=="glmmPQL") | any(class(model)=="glm")) newdata <- model$data
    if (any(class(model)=="glmmadmb")) newdata <- model$frame
  }
  
  ## baseline prediction, on the linear predictor scale:
  pred0 <- predict(model, re.form=NA, newdata=newdata)
  ## fixed-effects model matrix for new data
    if (any(class(model)=="glmmadmb")) {
  X <- model.matrix(delete.response(model$terms), newdata)
    } else {
  X <- model.matrix(formula(model,fixed.only=TRUE)[-2],
                    newdata)
    }

  if (any(class(model)=="glm")) {
    # Marc Girondot - 2016-01-09
    # Note that beta is not used
    beta <- model$coefficients
  } else {
    beta <- fixef(model) ## fixed-effects coefficients
  }

  V <- vcov(model)     ## variance-covariance matrix of beta
  
  # Marc Girondot - 2016-01-09
  if (any(!(colnames(V) %in% colnames(X)))) {
    dfi <- matrix(data = rep(0, dim(X)[1]*sum(!(colnames(V) %in% colnames(X)))), nrow = dim(X)[1])
    colnames(dfi) <- colnames(V)[!(colnames(V) %in% colnames(X))]
    X <- cbind(X, dfi)
  }
  
  pred.se <- sqrt(diag(X %*% V %*% t(X))) ## std errors of predictions

    ## inverse-link function
  # Marc Girondot - 2016-01-09
  if (any(class(model)=="glmmPQL") | any(class(model)=="glm")) linkinv <- model$family$linkinv
  if (any(class(model)=="glmerMod")) linkinv <- model@resp$family$linkinv
  if (any(class(model)=="glmmadmb")) linkinv <- model$ilinkfun

  ## construct 95% Normal CIs on the link scale and
  ##  transform back to the response (probability) scale:
  crit <- -qnorm(alpha/2)
  linkinv(cbind(lwr=pred0-crit*pred.se,
                upr=pred0+crit*pred.se))
}

• Example with glmer():

# I generate 1000 random numbers from a Poisson distribution
x <- rpois(1000, 3)
# I generate a data.frame with x, but also continuous or discrete covariates
df <- data.frame(x=x, y1=rnorm(1000, 1, 1)*(x+1)^0.5, 
                 y2=(rnorm(1000, 10, 5)*log(x+2))/10, 
                 y3=ifelse(runif(1000, 0, 1)*x/3>0.5, "A", "B"))
# I check if all is ok
head(df)
# load the library lme4
library("lme4")
# Estimate the parameters using glmer
e <- glmer(x ~ y1 + y2 + y3:y2 + y1:y2 + (1|y3), 
           data=df, family=poisson, 
           control=glmerControl(optimizer = "bobyqa", 
                                optCtrl = list(maxfun=100000)), 
           verbose=2)
# Set of data for predict
newdata_e <- data.frame(y1=rnorm(10, 1, 1), 
                  y2=rnorm(10, 1, 1), 
                  y3=ifelse(runif(10, 0, 1)>0.5, "A", "B"))
# Run the prediction
(pe <- easyPredCI(e, newdata=newdata_e))
# You can use easyPredCI() with the data used for fitting
(pe <- easyPredCI(e))


And the result is:

        lwr      upr
1  2.864613 8.413628
2  1.467924 4.124212
3  1.418995 3.998853
4  1.564421 4.404096
5  2.030049 5.755788
6  1.579275 4.446113
7  1.428897 4.029080
8  2.297845 6.571171
9  2.231492 6.344858
10 1.274041 3.603743

• Example with glmmPQL:

e <- glmmPQL(x ~ y1 + y2 + y3:y2 + y1:y2, random= ~ 1|y3, data=df, family=poisson)

(pe <- easyPredCI(e, newdata=newdata_e))

And the result is:

       lwr      upr
B 1.012495 3.515004
A 1.905914 6.632771
A 2.385655 8.491739
B 1.371221 4.773188
A 1.684377 5.888121
A 2.125170 7.412723
A 1.736507 6.049251
B 1.750232 6.156711
B 1.022459 3.545627
A 2.189937 7.651001

Example with glm:

e <- glm(x ~ y1 + y2 + y3:y2 + y1:y2, data=df, family=poisson)

(pe <- easyPredCI(e, newdata=newdata_e))

        lwr      upr
1  2.125692 2.379494
2  2.769475 3.178537
3  5.066256 6.265885
4  2.556010 2.856360
5  1.908120 2.356567
6  3.575420 4.026811
7  1.849100 2.217189
8  2.653758 3.128261
9  2.380577 2.679053
10 3.838240 4.336429

Example with glmmadmb:

# term y3 being a random factor cannot be also a fixed factor
e_ADMB <- glmmadmb(x ~ y1 + y2 + y1:y2 + (1|y3), data = df, 
                   family = "poisson")

(pe_ADMB <- easyPredCI(e_ADMB, newdata=newdata_e))

         lwr      upr
1  1.4786121 2.990365
2  1.4044935 2.870856
3  0.9682311 1.984513
4  1.4627873 3.107534
5  1.5432534 3.114533
6  1.4482634 2.966371
7  1.3147120 2.651343
8  2.4061769 5.121426
9  2.2069071 4.659780
10 1.0592681 2.167304

An alternative is using bootstrap, but it is very long:

e <- glmer(x ~ y1 + y2 + y3:y2 + y1:y2 + (1|y3), data=df, family=poisson, 
           control=glmerControl(optimizer = "bobyqa", 
                                optCtrl = list(maxfun=100000)), verbose=2)
newdata_e <- data.frame(y1=rnorm(10, 1, 1), 
                        y2=rnorm(10, 1, 1), 
                        y3=ifelse(runif(10, 0, 1)>0.5, "A", "B"))
bb <- bootMer(e,
              FUN=function(x)
                predict(x,re.form=NA,newdata=newdata_e,
                        type="response"),
              nsim=500, seed=101,use.u=TRUE)

cpredboot1.CI <- t(apply(bb$t,2,quantile,c(0.025,0.975),na.rm=TRUE))

Note that these codes do not take into account variability of the random effects. To include all the random effects:

bb <- bootMer(e,
              FUN=function(x)
                predict(x,re.form=NULL,newdata=newdata_e,
                        type="response"),
              nsim=500, seed=101,use.u=TRUE)


cpredboot1.CI <- t(apply(bb$t,2,quantile,c(0.025,0.975),na.rm=TRUE))

Commentaires

Posts les plus consultés de ce blog

Standard error from Hessian Matrix... what can be done when problem occurs

stepAIC from package MASS with AICc

Install treemix in ubuntu 20.04