\documentclass{chapman}
%%% copy Sweave.sty definitions
%%% keeps `sweave' from adding `\usepackage{Sweave}': DO NOT REMOVE
%\usepackage{Sweave}
\RequirePackage[T1]{fontenc}
\RequirePackage{graphicx,ae,fancyvrb}
\IfFileExists{upquote.sty}{\RequirePackage{upquote}}{}
\usepackage{relsize}
\DefineVerbatimEnvironment{Sinput}{Verbatim}{}
\DefineVerbatimEnvironment{Soutput}{Verbatim}{fontfamily=courier,
fontshape=it,
fontsize=\relsize{-1}}
\DefineVerbatimEnvironment{Scode}{Verbatim}{}
\newenvironment{Schunk}{}{}
%%% environment for raw output
\newcommand{\SchunkRaw}{\renewenvironment{Schunk}{}{}
\DefineVerbatimEnvironment{Soutput}{Verbatim}{fontfamily=courier,
fontshape=it,
fontsize=\small}
\rawSinput
}
%%% environment for labeled output
\newcommand{\nextcaption}{}
\newcommand{\SchunkLabel}{
\renewenvironment{Schunk}{\begin{figure}[ht] }{\caption{\nextcaption}
\end{figure} }
\DefineVerbatimEnvironment{Sinput}{Verbatim}{frame = topline}
\DefineVerbatimEnvironment{Soutput}{Verbatim}{frame = bottomline,
samepage = true,
fontfamily=courier,
fontshape=it,
fontsize=\relsize{-1}}
}
%%% S code with line numbers
\DefineVerbatimEnvironment{Sinput}
{Verbatim}
{
%% numbers=left
}
\newcommand{\numberSinput}{
\DefineVerbatimEnvironment{Sinput}{Verbatim}{numbers=left}
}
\newcommand{\rawSinput}{
\DefineVerbatimEnvironment{Sinput}{Verbatim}{}
}
%%% R / System symbols
\newcommand{\R}{\textsf{R}}
\newcommand{\rR}{{R}}
\renewcommand{\S}{\textsf{S}}
\newcommand{\SPLUS}{\textsf{S-PLUS}}
\newcommand{\rSPLUS}{{S-PLUS}}
\newcommand{\SPSS}{\textsf{SPSS}}
\newcommand{\EXCEL}{\textsf{Excel}}
\newcommand{\ACCESS}{\textsf{Access}}
\newcommand{\SQL}{\textsf{SQL}}
%%\newcommand{\Rpackage}[1]{\hbox{\rm\textit{#1}}}
%%\newcommand{\Robject}[1]{\hbox{\rm\texttt{#1}}}
%%\newcommand{\Rclass}[1]{\hbox{\rm\textit{#1}}}
%%\newcommand{\Rcmd}[1]{\hbox{\rm\texttt{#1}}}
\newcommand{\Rpackage}[1]{\index{#1 package@{\fontseries{b}\selectfont #1} package} {\fontseries{b}\selectfont #1}}
\newcommand{\rpackage}[1]{{\fontseries{b}\selectfont #1}}
\newcommand{\Robject}[1]{\texttt{#1}}
\newcommand{\Rclass}[1]{\index{#1 class@\textit{#1} class}\textit{#1}}
\newcommand{\Rcmd}[1]{\index{#1 function@\texttt{#1} function}\texttt{#1}}
\newcommand{\Roperator}[1]{\texttt{#1}}
\newcommand{\Rarg}[1]{\texttt{#1}}
\newcommand{\Rlevel}[1]{\texttt{#1}}
%%% other symbols
\newcommand{\file}[1]{\hbox{\rm\texttt{#1}}}
%%\newcommand{\stress}[1]{\index{#1}\textit{#1}}
\newcommand{\stress}[1]{\textit{#1}}
\newcommand{\booktitle}[1]{\textit{#1}} %%'
%%% Math symbols
\usepackage{amstext}
\usepackage{amsmath}
\newcommand{\E}{\mathsf{E}}
\newcommand{\Var}{\mathsf{Var}}
\newcommand{\Cov}{\mathsf{Cov}}
\newcommand{\Cor}{\mathsf{Cor}}
\newcommand{\x}{\mathbf{x}}
\newcommand{\y}{\mathbf{y}}
\renewcommand{\a}{\mathbf{a}}
\newcommand{\W}{\mathbf{W}}
\newcommand{\C}{\mathbf{C}}
\renewcommand{\H}{\mathbf{H}}
\newcommand{\X}{\mathbf{X}}
\newcommand{\B}{\mathbf{B}}
\newcommand{\V}{\mathbf{V}}
\newcommand{\I}{\mathbf{I}}
\newcommand{\D}{\mathbf{D}}
\newcommand{\bS}{\mathbf{S}}
\newcommand{\N}{\mathcal{N}}
\renewcommand{\L}{L}
\renewcommand{\P}{\mathsf{P}}
\newcommand{\K}{\mathbf{K}}
\newcommand{\m}{\mathbf{m}}
\newcommand{\argmin}{\operatorname{argmin}\displaylimits}
\newcommand{\argmax}{\operatorname{argmax}\displaylimits}
\newcommand{\bx}{\mathbf{x}}
\newcommand{\bbeta}{\mathbf{\beta}}
%%% links
\usepackage{hyperref}
\hypersetup{%
pdftitle = {A Handbook of Statistical Analyses Using R (3rd Edition)},
pdfsubject = {Book},
pdfauthor = {Torsten Hothorn and Brian S. Everitt},
colorlinks = {black},
linkcolor = {black},
citecolor = {black},
urlcolor = {black},
hyperindex = {true},
linktocpage = {true},
}
%%% captions & tables
%% : conflics with figure definition in chapman.cls
%%\usepackage[format=hang,margin=10pt,labelfont=bf]{caption}
%%
\usepackage{longtable}
\usepackage[figuresright]{rotating}
%%% R symbol in chapter 1
\usepackage{wrapfig}
%%% Bibliography
\usepackage[round,comma]{natbib}
\renewcommand{\refname}{References \addcontentsline{toc}{chapter}{References}}
\citeindexfalse
%%% texi2dvi complains that \newblock is undefined, hm...
\def\newblock{\hskip .11em plus .33em minus .07em}
%%% Example sections
\newcounter{exercise}[chapter]
\setcounter{exercise}{0}
\newcommand{\exercise}{\stepcounter{exercise} \item{Ex.~\arabic{chapter}.\arabic{exercise} }}
%% URLs
\newcommand{\curl}[1]{\begin{center} \url{#1} \end{center}}
%%% for manual corrections
%\renewcommand{\baselinestretch}{2}
%%% plot sizes
\setkeys{Gin}{width=0.95\textwidth}
%%% color
\usepackage{color}
%%% hyphenations
\hyphenation{drop-out}
\hyphenation{mar-gi-nal}
%%% new bidirectional quotes need
\usepackage[utf8]{inputenc}
%\usepackage{setspace}
\definecolor{sidebox_todo}{rgb}{1,1,0.2}
\newcommand{\todo}[1]{
\hspace{0pt}%
\marginpar{%
\fcolorbox{black}{sidebox_todo}{%
\parbox{\marginparwidth} {
\raggedright\sffamily\footnotesize{TODO: #1}%
}
}%
}
}
\begin{document}
%% Title page
\title{A Handbook of Statistical Analyses Using \R{} --- 3rd Edition}
\author{Torsten Hothorn and Brian S. Everitt}
\maketitle
%%\VignetteIndexEntry{Chapter Analyzing Longitudinal Data II}
%%\VignetteDepends{gee,lme4}
\setcounter{chapter}{13}
\SweaveOpts{prefix.string=figures/HSAUR,eps=FALSE,keep.source=TRUE}
<>=
rm(list = ls())
s <- search()[-1]
s <- s[-match(c("package:base", "package:stats", "package:graphics", "package:grDevices",
"package:utils", "package:datasets", "package:methods", "Autoloads"), s)]
if (length(s) > 0) sapply(s, detach, character.only = TRUE)
if (!file.exists("tables")) dir.create("tables")
if (!file.exists("figures")) dir.create("figures")
set.seed(290875)
options(prompt = "R> ", continue = "+ ",
width = 63, # digits = 4,
show.signif.stars = FALSE,
SweaveHooks = list(leftpar = function()
par(mai = par("mai") * c(1, 1.05, 1, 1)),
bigleftpar = function()
par(mai = par("mai") * c(1, 1.7, 1, 1))))
HSAURpkg <- require("HSAUR3")
if (!HSAURpkg) stop("cannot load package ", sQuote("HSAUR3"))
rm(HSAURpkg)
### hm, R-2.4.0 --vanilla seems to need this
a <- Sys.setlocale("LC_ALL", "C")
###
book <- TRUE
refs <- cbind(c("AItR", "DAGD", "SI", "CI", "ANOVA", "MLR", "GLM",
"DE", "RP", "GAM", "SA", "ALDI", "ALDII", "SIMC", "MA", "PCA",
"MDS", "CA"), 1:18)
ch <- function(x) {
ch <- refs[which(refs[,1] == x),]
if (book) {
return(paste("Chapter~\\\\ref{", ch[1], "}", sep = ""))
} else {
return(paste("Chapter~", ch[2], sep = ""))
}
}
if (file.exists("deparse.R"))
source("deparse.R")
setHook(packageEvent("lattice", "attach"), function(...) {
lattice.options(default.theme =
function()
standard.theme("pdf", color = FALSE))
})
@
\pagestyle{headings}
<>=
book <- FALSE
@
<>=
options(digits = 3)
if (!interactive()) {
print.summary.gee <- function (x, digits = NULL, quote = FALSE, prefix = "", ...)
{
if (is.null(digits))
digits <- options()$digits
else options(digits = digits)
cat("...")
cat("\nModel:\n")
cat(" Link: ", x$model$link, "\n")
cat(" Variance to Mean Relation:", x$model$varfun, "\n")
if (!is.null(x$model$M))
cat(" Correlation Structure: ", x$model$corstr, ", M =",
x$model$M, "\n")
else cat(" Correlation Structure: ", x$model$corstr, "\n")
cat("\n...")
nas <- x$nas
if (!is.null(nas) && any(nas))
cat("\n\nCoefficients: (", sum(nas), " not defined because of singularities)\n",
sep = "")
else cat("\n\nCoefficients:\n")
print(x$coefficients, digits = digits)
cat("\nEstimated Scale Parameter: ", format(round(x$scale,
digits)))
cat("\n...\n")
invisible(x)
}
}
@
\chapter[Analyzing Longitudinal Data II]{
Analyzing Longitudinal Data II -- Generalized Estimation Equations and Linear Mixed Effect Models:
Treating Respiratory Illness and Epileptic Seizures
\label{ALDII}}
\section{Introduction}
\section{Methods for Non-normal Distributions}
\section{Analysis Using \R{}: GEE}
\subsection{Beat the Blues Revisited}
To use the \Rcmd{gee} function, package \Rpackage{gee} \citep{PKG:gee}
has to be installed and attached:
<>=
library("gee")
@
The \Rcmd{gee} function is used in a similar way to the \Rcmd{lme} function
met in \Sexpr{ch("ALDI")} with the addition of the features of the
\Rcmd{glm} function that specify the appropriate error distribution
for the response and the implied link function, and an argument
to specify the structure of the working correlation matrix. Here
we will fit an independence structure and then an exchangeable
structure. The \R{} code for fitting generalized estimation equations to the
\Robject{BtheB\_long} data (as constructed in \Sexpr{ch("ALDI")}) with
identity working correlation matrix is as follows (note that the \Rcmd{gee} function
assumes the rows of the \Rclass{data.frame} \Robject{BtheB\_long} to be
ordered with respect to subjects):
<>=
data("BtheB", package = "HSAUR3")
BtheB$subject <- factor(rownames(BtheB))
nobs <- nrow(BtheB)
BtheB_long <- reshape(BtheB, idvar = "subject",
varying = c("bdi.2m", "bdi.3m", "bdi.5m", "bdi.8m"),
direction = "long")
BtheB_long$time <- rep(c(2, 3, 5, 8), rep(nobs, 4))
names(BtheB_long)[names(BtheB_long) == "treatment"] <- "trt"
@
<>=
osub <- order(as.integer(BtheB_long$subject))
BtheB_long <- BtheB_long[osub,]
btb_gee <- gee(bdi ~ bdi.pre + trt + length + drug,
data = BtheB_long, id = subject, family = gaussian,
corstr = "independence")
@
and with exchangeable correlation matrix:
<>=
btb_gee1 <- gee(bdi ~ bdi.pre + trt + length + drug,
data = BtheB_long, id = subject, family = gaussian,
corstr = "exchangeable")
@
The \Rcmd{summary} method can be used to inspect the fitted models; the
results are shown in Figures~\ref{ALDII-gee-summary} and
\ref{ALDII-gee1-summary}.
\renewcommand{\nextcaption}{\R{} output of the \Rcmd{summary} method
for the \Robject{btb\_gee} model (slightly abbreviated).
\label{ALDII-gee-summary}}
\SchunkLabel
<>=
summary(btb_gee)
@
\SchunkRaw
\renewcommand{\nextcaption}{\R{} output of the \Rcmd{summary} method
for the \Robject{btb\_gee1} model (slightly abbreviated).
\label{ALDII-gee1-summary}}
\SchunkLabel
<>=
summary(btb_gee1)
@
\SchunkRaw
\subsection{Respiratory Illness \label{ALDII:resp}}
The baseline status, i.e., the status for \Robject{month == 0}, will enter
the models as an explanatory variable and thus we have to rearrange the
\Rclass{data.frame} \Robject{respiratory}
in order to create a new variable \Robject{baseline}:
<>=
data("respiratory", package = "HSAUR3")
resp <- subset(respiratory, month > "0")
resp$baseline <- rep(subset(respiratory, month == "0")$status,
rep(4, 111))
resp$nstat <- as.numeric(resp$status == "good")
resp$month <- resp$month[, drop = TRUE]
@
<>=
names(resp)[names(resp) == "treatment"] <- "trt"
levels(resp$trt)[2] <- "trt"
@
The new variable \Robject{nstat} is simply a dummy coding for a poor
respiratory status. Now we can use the data \Robject{resp} to fit a logistic regression model
and GEE models with an independent and an exchangeable correlation structure
as follows.
<>=
resp_glm <- glm(status ~ centre + trt + gender + baseline
+ age, data = resp, family = "binomial")
resp_gee1 <- gee(nstat ~ centre + trt + gender + baseline
+ age, data = resp, family = "binomial", id = subject,
corstr = "independence", scale.fix = TRUE,
scale.value = 1)
resp_gee2 <- gee(nstat ~ centre + trt + gender + baseline
+ age, data = resp, family = "binomial", id = subject,
corstr = "exchangeable", scale.fix = TRUE,
scale.value = 1)
@
\renewcommand{\nextcaption}{\R{} output of the \Rcmd{summary} method
for the \Robject{resp\_glm} model.
\label{ALDII-resp-glm-summary}}
\SchunkLabel
<>=
summary(resp_glm)
@
\SchunkRaw
\renewcommand{\nextcaption}{\R{} output of the \Rcmd{summary} method
for the \Robject{resp\_gee1} model (slightly abbreviated).
\label{ALDII-resp-gee1-summary}}
\SchunkLabel
<>=
summary(resp_gee1)
@
\SchunkRaw
\renewcommand{\nextcaption}{\R{} output of the \Rcmd{summary} method
for the \Robject{resp\_gee2} model (slightly abbreviated).
\label{ALDII-resp-gee2-summary}}
\SchunkLabel
<>=
summary(resp_gee2)
@
\SchunkRaw
The estimated treatment effect taken from the exchangeable
structure GEE model is \Sexpr{round(coef(resp_gee2)["trttrt"], 3)}
which, using the robust standard
errors, has an associated $95\%$ confidence interval
<>=
se <- summary(resp_gee2)$coefficients["trttrt",
"Robust S.E."]
coef(resp_gee2)["trttrt"] +
c(-1, 1) * se * qnorm(0.975)
@
These values reflect effects on the log-odds scale. Interpretation
becomes simpler if we exponentiate the values to get the effects
in terms of odds. This gives a treatment effect of
\Sexpr{round(exp(coef(resp_gee2)["trttrt"]), 3)}
and a $95\%$ confidence interval of
<>=
exp(coef(resp_gee2)["trttrt"] +
c(-1, 1) * se * qnorm(0.975))
@
The odds of achieving a `good' respiratory status with the active treatment is between %'
about twice and seven times the corresponding odds for the placebo.
\subsection{Epilepsy}
Moving on to the count data in \Robject{epilepsy} from
Table~\ref{ALDII-epilepsy-tab}, we begin by calculating
the means and variances of the number of seizures for all
interactions between treatment and period:
<>=
data("epilepsy", package = "HSAUR3")
itp <- interaction(epilepsy$treatment, epilepsy$period)
tapply(epilepsy$seizure.rate, itp, mean)
tapply(epilepsy$seizure.rate, itp, var)
@
Some of the variances are considerably larger than the corresponding means, which for
a Poisson variable may suggest that overdispersion may be a problem, see
\Sexpr{ch("GLM")}.
\begin{figure}
\begin{center}
<>=
layout(matrix(1:2, nrow = 1))
ylim <- range(epilepsy$seizure.rate)
placebo <- subset(epilepsy, treatment == "placebo")
progabide <- subset(epilepsy, treatment == "Progabide")
boxplot(seizure.rate ~ period, data = placebo,
ylab = "Number of seizures",
xlab = "Period", ylim = ylim, main = "Placebo")
boxplot(seizure.rate ~ period, data = progabide,
main = "Progabide", ylab = "Number of seizures",
xlab = "Period", ylim = ylim)
@
\caption{Boxplots of numbers of seizures in each two-week
period post randomization for placebo and active treatments.
\label{ALDII-plot1}}
\end{center}
\end{figure}
\begin{figure}
\begin{center}
<>=
layout(matrix(1:2, nrow = 1))
ylim <- range(log(epilepsy$seizure.rate + 1))
boxplot(log(seizure.rate + 1) ~ period, data = placebo,
main = "Placebo", ylab = "Log number of seizures",
xlab = "Period", ylim = ylim)
boxplot(log(seizure.rate + 1) ~ period, data = progabide,
main = "Progabide", ylab = "Log number of seizures",
xlab = "Period", ylim = ylim)
@
\caption{Boxplots of log of numbers of seizures in
each two-week period post randomization for placebo and active
treatments. \label{ALDII-plot2}}
\end{center}
\end{figure}
We can now fit a Poisson regression model to the data
assuming independence using the \Rcmd{glm} function.
We also use the GEE approach to fit an
independence structure, followed by an exchangeable structure
using the following \R{} code:
<>=
per <- rep(log(2),nrow(epilepsy))
epilepsy$period <- as.numeric(epilepsy$period)
names(epilepsy)[names(epilepsy) == "treatment"] <- "trt"
fm <- seizure.rate ~ base + age + trt + offset(per)
epilepsy_glm <- glm(fm, data = epilepsy, family = "poisson")
epilepsy_gee1 <- gee(fm, data = epilepsy, family = "poisson",
id = subject, corstr = "independence", scale.fix = TRUE,
scale.value = 1)
epilepsy_gee2 <- gee(fm, data = epilepsy, family = "poisson",
id = subject, corstr = "exchangeable", scale.fix = TRUE,
scale.value = 1)
epilepsy_gee3 <- gee(fm, data = epilepsy, family = "poisson",
id = subject, corstr = "exchangeable", scale.fix = FALSE,
scale.value = 1)
@
As usual we inspect the fitted models using the \Rcmd{summary} method, the
results are given in Figures~\ref{ALDII-epilepsy-glm-summary},
\ref{ALDII-epilepsy-gee1-summary}, \ref{ALDII-epilepsy-gee2-summary}, and
\ref{ALDII-epilepsy-gee3-summary}.
\renewcommand{\nextcaption}{\R{} output of the \Rcmd{summary} method
for the \Robject{epilepsy\_glm} model.
\label{ALDII-epilepsy-glm-summary}}
\SchunkLabel
<>=
summary(epilepsy_glm)
@
\SchunkRaw
\renewcommand{\nextcaption}{\R{} output of the \Rcmd{summary} method
for the \Robject{epilepsy\_gee1} model (slightly abbreviated).
\label{ALDII-epilepsy-gee1-summary}}
\SchunkLabel
<>=
summary(epilepsy_gee1)
@
\SchunkRaw
\renewcommand{\nextcaption}{\R{} output of the \Rcmd{summary} method
for the \Robject{epilepsy\_gee2} model (slightly abbreviated).
\label{ALDII-epilepsy-gee2-summary}}
\SchunkLabel
<>=
summary(epilepsy_gee2)
@
\SchunkRaw
\renewcommand{\nextcaption}{\R{} output of the \Rcmd{summary} method
for the \Robject{epilepsy\_gee3} model (slightly abbreviated).
\label{ALDII-epilepsy-gee3-summary}}
\SchunkLabel
<>=
summary(epilepsy_gee3)
@
\SchunkRaw
\section{Analysis Using \R{}: Random Effects}
As an example of using generalized mixed models for the analysis
of longitudinal data with a non-normal response, the following
logistic model will be fitted to the respiratory illness data
\begin{eqnarray*}
\text{logit}(\P(\text{status} = \text{good})) & = &
\beta_0 + \beta_1 \text{treatment} + \beta_2 \text{time} + \beta_3 \text{gender} \\%
& & + \beta_4 \text{age} + \beta_5 \text{centre} +
\beta_6 \text{baseline} + u
\end{eqnarray*}
where $u$ is a subject-specific random effect.
The necessary \R{} code for fitting the model using the \Rcmd{glmer}
function from package \Rpackage{lme4} \citep{PKG:lme4,HSAUR:Bates2005}
is:
<>=
library("lme4")
resp_lmer <- glmer(status ~ baseline + month +
trt + gender + age + centre + (1 | subject),
family = binomial(), data = resp)
exp(fixef(resp_lmer))
@
The significance of the effects as estimated by this random effects
model and by the GEE model described in Section~\ref{ALDII:resp}
is generally similar.
But as expected from our previous discussion the estimated coefficients
are substantially larger. While the estimated effect of treatment
on a randomly sampled individual, given the set of observed covariates,
is estimated by the marginal model using GEE to increase the log-odds
of being disease free by $\Sexpr{round(coef(resp_gee2)["trttrt"], 3)}$,
the corresponding estimate from
the random effects model is
$\Sexpr{round(fixef(resp_lmer)["trttrt"], 3)}$.
These are not inconsistent
results but reflect the fact that the models are estimating
different parameters. The random effects estimate is conditional
upon the patient's random effect, a quantity that is rarely known
in practice. Were we to examine the log-odds of the average predicted
probabilities with and without treatment (averaged over the random
effects) this would give an estimate comparable to that estimated
within the marginal model.
<>=
su <- summary(resp_lmer)
if (!interactive()) {
summary <- function(x) {
cat("\n...\n")
cat("Fixed effects:\n")
lme4V <- packageDescription("lme4")$Version
if (compareVersion("0.999999-2", lme4V) >= 0) {
printCoefmat(su@coefs)
} else {
printCoefmat(su$coefficients)
}
cat("\n...\n")
}
}
@
\renewcommand{\nextcaption}{\R{} output of the \Rcmd{summary} method
for the \Robject{resp\_lmer} model (abbreviated).
\label{ALDII-resp-lmer-summary}}
\SchunkLabel
<>=
summary(resp_lmer)
@
\SchunkRaw
\clearpage
\bibliographystyle{LaTeXBibTeX/refstyle}
\bibliography{LaTeXBibTeX/HSAUR}
\end{document}