--- title: "Hierarchical Models with lme4, INLA, and Stan" author: "Eugenio Paglino" date: "\today" output: html_document --- ```{r, echo=F, include=F} knitr::opts_chunk$set( echo = FALSE, warning = FALSE, message = FALSE ) ``` ```{r} # Loading necessary packages library(lubridate) library(here) library(rstan) library(INLA) library(rstanarm) library(lme4) library(tidyverse) # Set seed for the Rmd set.seed(42) ``` ```{r} # Do not rely on this to completely clean your environment # Better to do a full restart of R before running rm(list=ls()) here::i_am('R/HLMExample.Rmd') inDir <- here::here('data') ``` ```{r} # Reading the data alc.data <- read_csv(here::here(inDir,'alcohol1_pp.csv')) ``` ```{r} # Plotting individual level trajectories (first 9 individuals) alc.data %>% filter(id<=9) %>% ggplot() + geom_point(mapping = aes(x=age,y=alcuse)) + geom_smooth(mapping = aes(x=age,y=alcuse), method='lm',se=F,color='grey') + facet_wrap(~ id) + labs(x='Age', y='Alcohol Use') + theme_minimal() ``` ```{r} # Creating time vector to store running times of different packages time.taken <- c() ``` ```{r} # Estimating the model with lmer start.time <- Sys.time() model.lme <- alcData %>% lmer(alcuse ~ 1 + age_14 + cpeer + ccoa + (1 | id) + (0 + age_14 | id), data=.) end.time <- Sys.time() time.taken[1] <- as.numeric(end.time - start.time,units='secs') ``` ```{r} # Estimating the model with stanarm (one core) start.time <- Sys.time() model.stanarm <- alcData %>% stan_lmer(alcuse ~ 1 + age_14 + cpeer + ccoa + (1 | id) + (0 + age_14 | id), data=.) end.time <- Sys.time() time.taken[2] <- as.numeric(end.time - start.time,units='secs') ``` ```{r} # Set Stan options for using all available cores options(mc.cores = parallel::detectCores()) rstan_options(auto_write = TRUE) # Estimating the model with stanarm (four cores) start.time <- Sys.time() model.stanarm <- alcData %>% stan_lmer(alcuse ~ 1 + age_14 + cpeer + ccoa + (1 | id) + (0 + age_14 | id), data=.) end.time <- Sys.time() time.taken[3] <- as.numeric(end.time - start.time,units='secs') ``` ```{r} # Estimating the model with inla (default prior) start.time <- Sys.time() model.inla <- alcData %>% mutate(id2 = id) %>% inla(alcuse ~ 1 + age_14 + cpeer + ccoa + f(id, model='iid') + f(id2, age_14, model='iid'), control.compute=list(config = TRUE), data=.) end.time <- Sys.time() time.taken[4] <- as.numeric(end.time - start.time,units='secs') ``` ```{r} # retrieve intercept means from lmer model inters.lme <- ranef(model.lme)$id[,1] # retrieve intercept means and intervals from stanarm model inters.means.stanarm <- apply(as.matrix(model.stanarm)[,seq(5,4+82,1)], M=2,function(x) mean(x)) inters.05.stanarm <- apply(as.matrix(model.stanarm)[,seq(5,4+82,1)], M=2,function(x) quantile(x,0.05)) inters.95.stanarm <- apply(as.matrix(model.stanarm)[,seq(5,4+82,1)], M=2,function(x) quantile(x,0.95)) # retrieve intercept means and intervals from inla model N <- 1000 sim <- inla.posterior.sample(N, model.inla) model.names <- row.names(sim[[1]]$latent) y.names <- model.names[1:nrow(alc.data)] param.names <- model.names[-c(1:nrow(alc.data))] param.names <- param.names[c(length(param.names),1:(length(param.names)-1))] inter.names <- sapply(1:82,function(x) paste0('id:',x)) inters.matrix.inla <- t(sapply(sim,function(x) x$latent[inter.names,1])) inters.means.inla <- apply(inters.matrix.inla,M=2,function(x) mean(x)) inters.05.inla <- apply(inters.matrix.inla,M=2,function(x) quantile(x,0.05)) inters.95.inla <- apply(inters.matrix.inla,M=2,function(x) quantile(x,0.95)) ``` ```{r} # plot estimates intercepts for the three packages ggplot() + geom_point(mapping=aes(x=1:82,y=inters.lme,color='lme')) + geom_point(mapping=aes(x=1:82,y=inters.means.stanarm,color='stan')) + geom_errorbar(mapping=aes(x=1:82, ymin=inters.05.stanarm, ymax=inters.95.stanarm, color='stan')) + geom_point(mapping=aes(x=1:82,y=inters.means.inla,color='inla')) + geom_errorbar(mapping=aes(x=1:82, ymin=inters.05.inla, ymax=inters.95.inla, color='inla')) + labs(x='Participant ID', y='ID-Specific Deviation from Grand Mean', color = '') + theme_minimal() ``` ```{r} # Estimating the model with inla (more informative prior, inducing more shrinking) model.inla <- alcData %>% mutate(id2 = id) %>% inla(alcuse ~ 1 + age_14 + cpeer + ccoa + f(id, model='iid', hyper = list(prec = list(param = c(5, 0.1)))) + f(id2, age_14, model='iid'), control.compute=list(config = TRUE), data=.) ``` ```{r} # retrieve intercept means and intervals from the new inla model N <- 1000 sim <- inla.posterior.sample(N, model.inla) model.names <- row.names(sim[[1]]$latent) y.names <- model.names[1:nrow(alc.data)] param.names <- model.names[-c(1:nrow(alc.data))] param.names <- param.names[c(length(param.names),1:(length(param.names)-1))] inter.names <- sapply(1:82,function(x) paste0('id:',x)) inters.matrix.inla <- t(sapply(sim,function(x) x$latent[inter.names,1])) inters.means.inla <- apply(inters.matrix.inla,M=2,function(x) mean(x)) inters.05.inla <- apply(inters.matrix.inla,M=2,function(x) quantile(x,0.05)) inters.95.inla <- apply(inters.matrix.inla,M=2,function(x) quantile(x,0.95)) ``` ```{r} # plot estimates intercepts for the lme and inla, showing the different # level of shrinkage ggplot() + geom_point(mapping=aes(x=1:82,y=inters.lme,color='lme')) + geom_point(mapping=aes(x=1:82,y=inters.means.inla,color='inla')) + geom_errorbar(mapping=aes(x=1:82, ymin=inters.05.inla, ymax=inters.95.inla, color='inla')) + labs(x='Participant ID', y='ID-Specific Deviation from Grand Mean', color = '') + theme_minimal() ```