predict() for glm() and glmm()
Ben Bolker proposed a very efficient way to predict the confidence interval for result of glmer() (library lme4) (fonction easyPredCI() in http://ms.mcmaster.ca/~bolker/R/misc/foxchapter/bolker_chap.html).
However, the code provided by Ben Bolker does not work for factors included in fixed effects. Furthermore, I expand the function to be used also with results of glmmPQL() (library MASS), glmmadmb() (library glmmADMB) or glm().
Here are the modifications of the code and examples of its use (in green, the new parts):
easyPredCI <- function(model, newdata=NULL, alpha=0.05) {
# Marc Girondot - 2016-01-09
if (is.null(newdata)) {
if (any(class(model)=="glmerMod")) newdata <- model@frame
if (any(class(model)=="glmmPQL") | any(class(model)=="glm")) newdata <- model$data
if (any(class(model)=="glmmadmb")) newdata <- model$frame
}
## baseline prediction, on the linear predictor scale:
pred0 <- predict(model, re.form=NA, newdata=newdata)
## fixed-effects model matrix for new data
if (any(class(model)=="glmmadmb")) {
X <- model.matrix(delete.response(model$terms), newdata)
} else {
X <- model.matrix(formula(model,fixed.only=TRUE)[-2],
newdata)
}
if (any(class(model)=="glm")) {
# Marc Girondot - 2016-01-09
# Note that beta is not used
beta <- model$coefficients
} else {
beta <- fixef(model) ## fixed-effects coefficients
}
V <- vcov(model) ## variance-covariance matrix of beta
# Marc Girondot - 2016-01-09
if (any(!(colnames(V) %in% colnames(X)))) {
dfi <- matrix(data = rep(0, dim(X)[1]*sum(!(colnames(V) %in% colnames(X)))), nrow = dim(X)[1])
colnames(dfi) <- colnames(V)[!(colnames(V) %in% colnames(X))]
X <- cbind(X, dfi)
}
pred.se <- sqrt(diag(X %*% V %*% t(X))) ## std errors of predictions
## inverse-link function
# Marc Girondot - 2016-01-09
if (any(class(model)=="glmmPQL") | any(class(model)=="glm")) linkinv <- model$family$linkinv
if (any(class(model)=="glmerMod")) linkinv <- model@resp$family$linkinv
if (any(class(model)=="glmmadmb")) linkinv <- model$ilinkfun
## construct 95% Normal CIs on the link scale and
## transform back to the response (probability) scale:
crit <- -qnorm(alpha/2)
linkinv(cbind(lwr=pred0-crit*pred.se,
upr=pred0+crit*pred.se))
}
• Example with glmer():
# I generate 1000 random numbers from a Poisson distribution
x <- rpois(1000, 3)
# I generate a data.frame with x, but also continuous or discrete covariates
df <- data.frame(x=x, y1=rnorm(1000, 1, 1)*(x+1)^0.5,
y2=(rnorm(1000, 10, 5)*log(x+2))/10,
y3=ifelse(runif(1000, 0, 1)*x/3>0.5, "A", "B"))
# I check if all is ok
head(df)
# load the library lme4
library("lme4")
# Estimate the parameters using glmer
e <- glmer(x ~ y1 + y2 + y3:y2 + y1:y2 + (1|y3),
data=df, family=poisson,
control=glmerControl(optimizer = "bobyqa",
optCtrl = list(maxfun=100000)),
verbose=2)
# Set of data for predict
newdata_e <- data.frame(y1=rnorm(10, 1, 1),
y2=rnorm(10, 1, 1),
y3=ifelse(runif(10, 0, 1)>0.5, "A", "B"))
# Run the prediction
(pe <- easyPredCI(e, newdata=newdata_e))
# You can use easyPredCI() with the data used for fitting
(pe <- easyPredCI(e))
And the result is:
lwr upr
1 2.864613 8.413628
2 1.467924 4.124212
3 1.418995 3.998853
4 1.564421 4.404096
5 2.030049 5.755788
6 1.579275 4.446113
7 1.428897 4.029080
8 2.297845 6.571171
9 2.231492 6.344858
10 1.274041 3.603743
• Example with glmmPQL:
e <- glmmPQL(x ~ y1 + y2 + y3:y2 + y1:y2, random= ~ 1|y3, data=df, family=poisson)
(pe <- easyPredCI(e, newdata=newdata_e))
And the result is:
lwr upr
B 1.012495 3.515004
A 1.905914 6.632771
A 2.385655 8.491739
B 1.371221 4.773188
A 1.684377 5.888121
A 2.125170 7.412723
A 1.736507 6.049251
B 1.750232 6.156711
B 1.022459 3.545627
A 2.189937 7.651001
Example with glm:
e <- glm(x ~ y1 + y2 + y3:y2 + y1:y2, data=df, family=poisson)
(pe <- easyPredCI(e, newdata=newdata_e))
lwr upr
1 2.125692 2.379494
2 2.769475 3.178537
3 5.066256 6.265885
4 2.556010 2.856360
5 1.908120 2.356567
6 3.575420 4.026811
7 1.849100 2.217189
8 2.653758 3.128261
9 2.380577 2.679053
10 3.838240 4.336429
Example with glmmadmb:
# term y3 being a random factor cannot be also a fixed factor
e_ADMB <- glmmadmb(x ~ y1 + y2 + y1:y2 + (1|y3), data = df,
family = "poisson")
(pe_ADMB <- easyPredCI(e_ADMB, newdata=newdata_e))
lwr upr
1 1.4786121 2.990365
2 1.4044935 2.870856
3 0.9682311 1.984513
4 1.4627873 3.107534
5 1.5432534 3.114533
6 1.4482634 2.966371
7 1.3147120 2.651343
8 2.4061769 5.121426
9 2.2069071 4.659780
10 1.0592681 2.167304
An alternative is using bootstrap, but it is very long:
e <- glmer(x ~ y1 + y2 + y3:y2 + y1:y2 + (1|y3), data=df, family=poisson,
control=glmerControl(optimizer = "bobyqa",
optCtrl = list(maxfun=100000)), verbose=2)
newdata_e <- data.frame(y1=rnorm(10, 1, 1),
y2=rnorm(10, 1, 1),
y3=ifelse(runif(10, 0, 1)>0.5, "A", "B"))
bb <- bootMer(e,
FUN=function(x)
predict(x,re.form=NA,newdata=newdata_e,
type="response"),
nsim=500, seed=101,use.u=TRUE)
cpredboot1.CI <- t(apply(bb$t,2,quantile,c(0.025,0.975),na.rm=TRUE))
Note that these codes do not take into account variability of the random effects. To include all the random effects:
bb <- bootMer(e,
FUN=function(x)
predict(x,re.form=NULL,newdata=newdata_e,
type="response"),
nsim=500, seed=101,use.u=TRUE)
cpredboot1.CI <- t(apply(bb$t,2,quantile,c(0.025,0.975),na.rm=TRUE))
Commentaires
Enregistrer un commentaire