predict() for glm() and glmm()

février 10, 2016

Ben Bolker proposed a very efficient way to predict the confidence interval for result of glmer() (library lme4) (fonction easyPredCI() in http://ms.mcmaster.ca/~bolker/R/misc/foxchapter/bolker_chap.html).

However, the code provided by Ben Bolker does not work for factors included in fixed effects. Furthermore, I expand the function to be used also with results of glmmPQL() (library MASS), glmmadmb() (library glmmADMB) or glm().

Here are the modifications of the code and examples of its use (in green, the new parts):

easyPredCI <- function(model, newdata=NULL, alpha=0.05) {

  # Marc Girondot - 2016-01-09

  if (is.null(newdata)) {

    if (any(class(model)=="glmerMod")) newdata <- model@frame

    if (any(class(model)=="glmmPQL") | any(class(model)=="glm")) newdata <- model$data

    if (any(class(model)=="glmmadmb")) newdata <- model$frame

}

  ## baseline prediction, on the linear predictor scale:

  pred0 <- predict(model, re.form=NA, newdata=newdata)

  ## fixed-effects model matrix for new data

    if (any(class(model)=="glmmadmb")) {

  X <- model.matrix(delete.response(model$terms), newdata)

    } else {

  X <- model.matrix(formula(model,fixed.only=TRUE)[-2],

                    newdata)

}

  if (any(class(model)=="glm")) {

    # Marc Girondot - 2016-01-09

    # Note that beta is not used

    beta <- model$coefficients

  } else {

    beta <- fixef(model) ## fixed-effects coefficients

}

  V <- vcov(model)     ## variance-covariance matrix of beta

  # Marc Girondot - 2016-01-09

  if (any(!(colnames(V) %in% colnames(X)))) {

    dfi <- matrix(data = rep(0, dim(X)[1]*sum(!(colnames(V) %in% colnames(X)))), nrow = dim(X)[1])

    colnames(dfi) <- colnames(V)[!(colnames(V) %in% colnames(X))]

    X <- cbind(X, dfi)

}

  pred.se <- sqrt(diag(X %*% V %*% t(X))) ## std errors of predictions

    ## inverse-link function

  # Marc Girondot - 2016-01-09

  if (any(class(model)=="glmmPQL") | any(class(model)=="glm")) linkinv <- model$family$linkinv

  if (any(class(model)=="glmerMod")) linkinv <- model@resp$family$linkinv

  if (any(class(model)=="glmmadmb")) linkinv <- model$ilinkfun

  ## construct 95% Normal CIs on the link scale and

  ##  transform back to the response (probability) scale:

  crit <- -qnorm(alpha/2)

  linkinv(cbind(lwr=pred0-crit*pred.se,

                upr=pred0+crit*pred.se))

}

• Example with glmer():

# I generate 1000 random numbers from a Poisson distribution

x <- rpois(1000, 3)

# I generate a data.frame with x, but also continuous or discrete covariates

df <- data.frame(x=x, y1=rnorm(1000, 1, 1)*(x+1)^0.5, 

                 y2=(rnorm(1000, 10, 5)*log(x+2))/10, 

                 y3=ifelse(runif(1000, 0, 1)*x/3>0.5, "A", "B"))

# I check if all is ok

head(df)

# load the library lme4

library("lme4")

# Estimate the parameters using glmer

e <- glmer(x ~ y1 + y2 + y3:y2 + y1:y2 + (1|y3), 

           data=df, family=poisson, 

           control=glmerControl(optimizer = "bobyqa", 

                                optCtrl = list(maxfun=100000)), 

           verbose=2)

# Set of data for predict

newdata_e <- data.frame(y1=rnorm(10, 1, 1), 

                  y2=rnorm(10, 1, 1), 

                  y3=ifelse(runif(10, 0, 1)>0.5, "A", "B"))

# Run the prediction

(pe <- easyPredCI(e, newdata=newdata_e))

# You can use easyPredCI() with the data used for fitting

(pe <- easyPredCI(e))

And the result is:

        lwr      upr

1  2.864613 8.413628

2  1.467924 4.124212

3  1.418995 3.998853

4  1.564421 4.404096

5  2.030049 5.755788

6  1.579275 4.446113

7  1.428897 4.029080

8  2.297845 6.571171

9  2.231492 6.344858

10 1.274041 3.603743

• Example with glmmPQL:

e <- glmmPQL(x ~ y1 + y2 + y3:y2 + y1:y2, random= ~ 1|y3, data=df, family=poisson)

(pe <- easyPredCI(e, newdata=newdata_e))

And the result is:

       lwr      upr

B 1.012495 3.515004

A 1.905914 6.632771

A 2.385655 8.491739

B 1.371221 4.773188

A 1.684377 5.888121

A 2.125170 7.412723

A 1.736507 6.049251

B 1.750232 6.156711

B 1.022459 3.545627

A 2.189937 7.651001

Example with glm:

e <- glm(x ~ y1 + y2 + y3:y2 + y1:y2, data=df, family=poisson)

(pe <- easyPredCI(e, newdata=newdata_e))

        lwr      upr

1  2.125692 2.379494

2  2.769475 3.178537

3  5.066256 6.265885

4  2.556010 2.856360

5  1.908120 2.356567

6  3.575420 4.026811

7  1.849100 2.217189

8  2.653758 3.128261

9  2.380577 2.679053

10 3.838240 4.336429

Example with glmmadmb:

# term y3 being a random factor cannot be also a fixed factor

e_ADMB <- glmmadmb(x ~ y1 + y2 + y1:y2 + (1|y3), data = df, 

                   family = "poisson")

(pe_ADMB <- easyPredCI(e_ADMB, newdata=newdata_e))

         lwr      upr

1  1.4786121 2.990365

2  1.4044935 2.870856

3  0.9682311 1.984513

4  1.4627873 3.107534

5  1.5432534 3.114533

6  1.4482634 2.966371

7  1.3147120 2.651343

8  2.4061769 5.121426

9  2.2069071 4.659780

10 1.0592681 2.167304

An alternative is using bootstrap, but it is very long:

e <- glmer(x ~ y1 + y2 + y3:y2 + y1:y2 + (1|y3), data=df, family=poisson, 

           control=glmerControl(optimizer = "bobyqa", 

                                optCtrl = list(maxfun=100000)), verbose=2)

newdata_e <- data.frame(y1=rnorm(10, 1, 1), 

                        y2=rnorm(10, 1, 1), 

                        y3=ifelse(runif(10, 0, 1)>0.5, "A", "B"))

bb <- bootMer(e,

              FUN=function(x)

                predict(x,re.form=NA,newdata=newdata_e,

                        type="response"),

              nsim=500, seed=101,use.u=TRUE)

cpredboot1.CI <- t(apply(bb$t,2,quantile,c(0.025,0.975),na.rm=TRUE))

Note that these codes do not take into account variability of the random effects. To include all the random effects:

bb <- bootMer(e,

              FUN=function(x)

                predict(x,re.form=NULL,newdata=newdata_e,

                        type="response"),

              nsim=500, seed=101,use.u=TRUE)

cpredboot1.CI <- t(apply(bb$t,2,quantile,c(0.025,0.975),na.rm=TRUE))

Rechercher dans ce blog

BiostatR Blog

predict() for glm() and glmm()

Commentaires

Enregistrer un commentaire

Posts les plus consultés de ce blog

Standard error from Hessian Matrix... what can be done when problem occurs

Install treemix in ubuntu 20.04

Multivariable analysis and correlation of iconography