# rjags MCMC code for the aspirin meta-analysis case study # unbuffer the output # if You've already installed JAGS from sourceforge, skip step (0) # (0) type 'rjags sourceforge' in your favorite browser search window; # the top entry should say # JAGS: Just Another Gibbs Sampler # clicking on that link will take you to sourceforge.net , # where you now click on the field that says # Download Latest Version JAGS-4.3.0.exe (33.9 MB) # double-left click on the .exe download icon and follow # the resulting instructions to install JAGS (i recommend # that you accept all default settings offered to you) # (1) create a new directory; for illustration here, i'm going # to call it # Aspirin-Meta-Analysis-rjags # (2) in a text file, write your model and store this text file # in the directory Aspirin-Meta-Analysis-rjags ; for illustration here # i'll call that file # aspirin-meta-analysis-model.txt # for clarity, this file looks like the following here # (without the # comment symbols, of course): # model { # mu ~ dunif( -2, 5 ) # sigma ~ dunif( 0, 6 ) # for ( i in 1:k ) { # theta[ i ] ~ dnorm( mu, tau.theta ) # y[ i ] ~ dnorm( theta[ i ], tau.y[ i ] ) # } # tau.theta <- 1.0 / ( sigma * sigma ) # positive.effect <- step( mu ) # } # (3) open up an R session and change directory to # Aspirin-Meta-Analysis-rjags # (4) if this is your first time using rjags, you have to install # the rjags package and then load it; if you've already # installed it you just need to load it now (you have to be # connected to the internet for the next step to work). # at the R prompt > issue the command install.packages( 'rjags' ) # you'll need to select a CRAN mirror site: i use # USA (CA 1) [https] # R will now mumble a few things and finish with a remark like # The downloaded binary packages are in blah-blah # to load rjags, at the R prompt > issue the command library( rjags ) # R will again mumble a few things, possibly finishing with # some warning messages, which you can ignore, saying that # various things were built under a different version of R # than the one you're using; when you get a > prompt at the end # of the mumbling, rjags will be loaded # (5) now enter the aspirin meta-analysis data set as a list # and assign it to an object; here i've called it # aspirin.meta.analysis.data : aspirin.meta.analysis.data <- list( k = 6, y = c( 2.77, 2.50, 1.84, 2.56, 2.32, -1.15 ), tau.y = c( 0.3673, 0.5827, 0.1826, 0.3586, 0.2551, 1.235 ) ) # (6) now enter initial values for the Markov chain as a list and assign # it to an object; here i've called it : aspirin.meta.analysis.initial.values <- list( mu = 1.45, sigma = 1.24 ) # (7) now you do model checking and compiling with the following # command (using the names i've chosen for illustration above): aspirin.meta.analysis.run.1 <- jags.model( file = "aspirin-meta-analysis-model.txt", data = aspirin.meta.analysis.data, inits = aspirin.meta.analysis.initial.values ) # (8) before doing the random MCMC sampling, it's good form to set the # random number seed, so that other people running your code get # the same results you do: set.seed( 123454321 ) # (9) now you do a burn-in of (e.g.) 1000 iterations from your # initial values with the following commands: n.burnin <- 1000 system.time( update( aspirin.meta.analysis.run.1, n.iter = n.burnin ) ) # on my desktop, with an Intel Core i7 3770 at 3.40GHz, # single-threaded, 1000 burn-in iterations took about 0.3 seconds # (10) now you do a monitoring run of (e.g.) 100000 iterations with # the following command (at decent GHz this will take about # 90 seconds): n.monitor <- 100000 system.time( aspirin.meta.analysis.run.1.results <- jags.samples( aspirin.meta.analysis.run.1, variable.names = c( 'mu', 'sigma', 'theta', 'positive.effect' ), n.iter = n.monitor ) ) # on my desktop (single-threaded), 100000 monitoring iterations # took about 0.96 seconds # (11) now you get to summarize the posterior in a variety of ways; # here's some useful code to do so; first you extract each of # the monitored columns of the MCMC data set into individual # variables: mu.star <- aspirin.meta.analysis.run.1.results$mu sigma.star <- aspirin.meta.analysis.run.1.results$sigma theta.star <- aspirin.meta.analysis.run.1.results$theta positive.effect.star <- aspirin.meta.analysis.run.1.results$positive.effect # then you can create graphical and numerical summaries of each # monitored quantity, as follows: ##### summary for mu # (11a) first you create what i call the MCMC 4-plot: par( mfrow = c( 2, 2 ) ) plot( 1:n.monitor, mu.star, type = 'l', xlab = 'Iteration Number', ylab = 'mu' ) # the upper left graph is a time-series plot of the monitored # iterations (to visually check for stationarity) plot( density( mu.star ), xlab = 'mu', main = '', lwd = 2, col = 'red' ) # the upper right graph is a density trace of the marginal posterior # distribution for the quantity being monitored acf( as.ts( mu.star[ 1:n.monitor ], start = 1, end = n.monitor, frequency = 1 ), lag.max = 10, main = '' ) pacf( as.ts( mu.star[ 1:n.monitor ], start = 1, end = n.monitor, frequency = 1 ), lag.max = 10, main = '' ) par( mfrow = c( 1, 1 ) ) # the lower left and right graphs are plots of the autocorrelation # function (ACF) and the partial autocorrelation function (PACF) # for the time series of monitored iterations; if these iterations # behave like an autoregressive time series of order 1 with first- # order autocorrelation rho (often denoted by AR1( rho )), then # (a) the PACF will have a single spike of height rho at lag 1 and # no other significant spikes and (b) the ACF will exhibit a # geometric decay pattern with a spike of height rho at lag 1, a # spike of approximate height rho^2 at lag 2, and so on -- for # the parameter mu the ACF and PACF plots look perfectly like an # AR1 with a first-order autocorrelation of about rho.1.hat = 0.6 # an MCMC time series with a rho.1.hat value close to 0 (which # would be equivalent to IID sampling) is said to be "mixing well" # is mu mixing well here? (see take-home test 2) # (11b) then you can make numerical summaries of the marginal # posterior for the monitored quantity: c( mean( mu.star ), sd( mu.star ), quantile( mu.star, probs = c( 0.025, 0.975 ) ) ) # 2.5% 97.5% # 1.5022744 1.0591046 -0.5498263 3.7844192 print( rho.1.hat.mu.star <- cor( mu.star[ 1:( n.monitor - 1 ) ], mu.star[ 2:n.monitor ] ) ) # 0.5935416 # the reason for making the ACF and PACF plots is that if the time # series of monitored iterations for a given quantity behaves like # an AR1( rho ), then the Monte Carlo standard error (MCSE) of the # mean of this series is given by the following expression: print( se.mu.star.mean <- ( sd( mu.star ) / sqrt( n.monitor ) ) * sqrt( ( 1 + rho.1.hat.mu.star ) / ( 1 - rho.1.hat.mu.star ) ) ) # 0.006631511 # this is also the right order of magnitude for the MCSE of the # MCMC estimate of the posterior SD and the quantiles giving the # 95% interval for the monitored quantity # what do we conclude about the effect of low-dose aspirin # on patients surviving a first heart attack in the population # to which it's appropriate to generalize? (see take-home test 2) ##### summary for sigma par( mfrow = c( 2, 2 ) ) plot( 1:n.monitor, sigma.star, type = 'l', xlab = 'Iteration Number', ylab = 'sigma' ) plot( density( sigma.star ), xlab = 'sigma', main = '', lwd = 2, col = 'red' ) acf( as.ts( sigma.star[ 1:n.monitor ], start = 1, end = n.monitor, frequency = 1 ), lag.max = 10, main = '' ) pacf( as.ts( sigma.star[ 1:n.monitor ], start = 1, end = n.monitor, frequency = 1 ), lag.max = 10, main = '' ) par( mfrow = c( 1, 1 ) ) # the marginal posterior for sigma has a bit of a long right-hand # tail, as you would expect for a scale parameter; the ACF and PACF # plots show that the sigma time series behaves more or less like an # AR1 with a first-order autocorrelation of about 0.45 (so the # MCMC output for this parameter is not mixing quite as well as # the output for mu, but 0.45 is still a fairly low autocorrelation) c( mean( sigma.star ), sd( sigma.star ), quantile( sigma.star, probs = c( 0.025, 0.975 ) ) ) # 2.5% 97.5% # 1.894499 1.079572 0.258231 4.609201 print( rho.1.hat.sigma.star <- cor( sigma.star[ 1:( n.monitor - 1 ) ], sigma.star[ 2:n.monitor ] ) ) # [1] 0.7840861 print( se.sigma.star.mean <- ( sd( sigma.star ) / sqrt( n.monitor ) ) * sqrt( ( 1 + rho.1.hat.sigma.star ) / ( 1 - rho.1.hat.sigma.star ) ) ) # [1] 0.009813393 # what do we conclude about the extent of the between-study heterogeneity # in the aspirin meta-analysis? ##### summary for theta # let's see what rjags has put into its output object for theta: str( aspirin.meta.analysis.run.1.results$theta ) # 'mcarray' num [1:6, 1:100000, 1] 5.23 3.83 3.91 3.09 3.47 ... # - attr(*, "varname")= chr "theta" # - attr(*, "type")= chr "trace" # - attr(*, "iterations")= Named num [1:3] 2001 102000 1 # ..- attr(*, "names")= chr [1:3] "start" "end" "thin" # rjags has created its own object type, 'mcarray', # which here appears as it it may behave like a matrix with 6 rows # and 100000 columns (this is backwards from how we usually visualize # the mcmc data set) theta.star.matrix <- matrix( aspirin.meta.analysis.run.1.results$theta, 100000, 6, byrow = T ) print( theta.star.mean <- apply( theta.star.matrix, 2, mean ) ) # [1] 2.084521 2.044840 1.592835 1.985556 1.811946 -0.424099 print( theta.star.sd <- apply( theta.star.matrix, 2, sd ) ) # [1] 1.315632 1.123722 1.541833 1.314509 1.433813 0.952523 par( mfrow = c( 3, 2 ) ) for ( i in 1:6 ) { plot( density( theta.star.matrix[ , i ] ), xlab = paste( 'theta.', i, sep = '' ), main = '', lwd = 2, col = 'red' ) } par( mfrow = c( 1, 1 ) ) # how do the bayesian results for the theta[ i ] compare with those # from maximum likelihood? (see take-home test 2) ##### summary for positive.effect # as j runs from 1 to 100000, positive.effect[ j ] is 1 if # mu.star[ j ] > 0 and 0 otherwise, so we can get a monte-carlo estimate # of the posterior probability that low-dose aspirin would lower mortality # in the population just by taking the mean of positive.effect: print( posterior.probability.aspirin.is.beneficial <- mean( positive.effect.star ) ) # [1] 0.93224 # what does this imply about appropriate clinical use of low-dose aspirin? # (see take-home test 2)