# rjags MCMC code for the aspirin meta-analysis case study

# unbuffer the output

#     if You've already installed JAGS from sourceforge, skip step (0)

# (0) type 'rjags sourceforge' in your favorite browser search window; 
#     the top entry should say

#       JAGS: Just Another Gibbs Sampler

#     clicking on that link will take you to sourceforge.net ,
#     where you now click on the field that says

#       Download Latest Version JAGS-4.3.0.exe (33.9 MB)

#     double-left click on the .exe download icon and follow
#     the resulting instructions to install JAGS (i recommend
#     that you accept all default settings offered to you)


# (1) create a new directory; for illustration here, i'm going
#     to call it

#       Aspirin-Meta-Analysis-rjags

# (2) in a text file, write your model and store this text file 
#     in the directory Aspirin-Meta-Analysis-rjags ; for illustration here 
#     i'll call that file

#       aspirin-meta-analysis-model.txt

#     for clarity, this file looks like the following here
#     (without the # comment symbols, of course):

# model {

#   mu ~ dunif( -2, 5 )
#   sigma ~ dunif( 0, 6 )

#   for ( i in 1:k ) {

#     theta[ i ] ~ dnorm( mu, tau.theta )
#     y[ i ] ~ dnorm( theta[ i ], tau.y[ i ] )

#   }

#   tau.theta <- 1.0 / ( sigma * sigma )

#   positive.effect <- step( mu )

# }

# (3) open up an R session and change directory to 
#     Aspirin-Meta-Analysis-rjags

# (4) if this is your first time using rjags, you have to install
#     the rjags package and then load it; if you've already
#     installed it you just need to load it now (you have to be
#     connected to the internet for the next step to work).

#     at the R prompt > issue the command

install.packages( 'rjags' )

#     you'll need to select a CRAN mirror site: i use 

#       USA (CA 1) [https]

#     R will now mumble a few things and finish with a remark like

#       The downloaded binary packages are in blah-blah

#     to load rjags, at the R prompt > issue the command

library( rjags ) 

#     R will again mumble a few things, possibly finishing with 
#     some warning messages, which you can ignore, saying that 
#     various things were built under a different version of R 
#     than the one you're using; when you get a > prompt at the end 
#     of the mumbling, rjags will be loaded

# (5) now enter the aspirin meta-analysis data set as a list 
#     and assign it to an object; here i've called it 
#     aspirin.meta.analysis.data :

aspirin.meta.analysis.data <- list( k = 6, 
  y = c( 2.77, 2.50, 1.84, 2.56, 2.32, -1.15 ),
  tau.y = c( 0.3673, 0.5827, 0.1826, 0.3586, 0.2551, 1.235 ) )

# (6) now enter initial values for the Markov chain as a list and assign
# it to an object; here i've called it  :

aspirin.meta.analysis.initial.values <- list( mu = 1.45, sigma = 1.24 )

# (7) now you do model checking and compiling with the following 
#     command (using the names i've chosen for illustration above):

aspirin.meta.analysis.run.1 <- jags.model( 
  file = "aspirin-meta-analysis-model.txt", 
  data = aspirin.meta.analysis.data, inits = 
  aspirin.meta.analysis.initial.values )

# (8) before doing the random MCMC sampling, it's good form to set the
#     random number seed, so that other people running your code get
#     the same results you do:

set.seed( 123454321 )

# (9) now you do a burn-in of (e.g.) 1000 iterations from your
#     initial values with the following commands:

n.burnin <- 1000

system.time( 

  update( aspirin.meta.analysis.run.1, n.iter = n.burnin )

)

#      on my desktop, with an Intel Core i7 3770 at 3.40GHz, 
#      single-threaded, 1000 burn-in iterations took about 0.3 seconds

# (10) now you do a monitoring run of (e.g.) 100000 iterations with
#      the following command (at decent GHz this will take about 
#      90 seconds):

n.monitor <- 100000

system.time( 

  aspirin.meta.analysis.run.1.results <- jags.samples( 
    aspirin.meta.analysis.run.1, 
    variable.names = c( 'mu', 'sigma', 'theta', 'positive.effect' ), 
    n.iter = n.monitor )

)

#      on my desktop (single-threaded), 100000 monitoring iterations
#      took about 0.96 seconds

# (11) now you get to summarize the posterior in a variety of ways;
#      here's some useful code to do so; first you extract each of
#      the monitored columns of the MCMC data set into individual
#      variables:

mu.star <- aspirin.meta.analysis.run.1.results$mu

sigma.star <- aspirin.meta.analysis.run.1.results$sigma

theta.star <- aspirin.meta.analysis.run.1.results$theta

positive.effect.star <- aspirin.meta.analysis.run.1.results$positive.effect

#      then you can create graphical and numerical summaries of each
#      monitored quantity, as follows:

##### summary for mu

# (11a) first you create what i call the MCMC 4-plot:

par( mfrow = c( 2, 2 ) )

plot( 1:n.monitor, mu.star, type = 'l', xlab = 'Iteration Number',
  ylab = 'mu' )

# the upper left graph is a time-series plot of the monitored
# iterations (to visually check for stationarity)

plot( density( mu.star ), xlab = 'mu', main = '', lwd = 2, col = 'red' )

# the upper right graph is a density trace of the marginal posterior
# distribution for the quantity being monitored

acf( as.ts( mu.star[ 1:n.monitor ], start = 1, 
  end = n.monitor, frequency = 1 ), lag.max = 10, main = '' )

pacf( as.ts( mu.star[ 1:n.monitor ], start = 1, 
  end = n.monitor, frequency = 1 ), lag.max = 10, main = '' )

par( mfrow = c( 1, 1 ) )

# the lower left and right graphs are plots of the autocorrelation 
# function (ACF) and the partial autocorrelation function (PACF) 
# for the time series of monitored iterations; if these iterations
# behave like an autoregressive time series of order 1 with first-
# order autocorrelation rho (often denoted by AR1( rho )), then
# (a) the PACF will have a single spike of height rho at lag 1 and
# no other significant spikes and (b) the ACF will exhibit a
# geometric decay pattern with a spike of height rho at lag 1, a
# spike of approximate height rho^2 at lag 2, and so on -- for
# the parameter mu the ACF and PACF plots look perfectly like an
# AR1 with a first-order autocorrelation of about rho.1.hat = 0.6

# an MCMC time series with a rho.1.hat value close to 0 (which
# would be equivalent to IID sampling) is said to be "mixing well"

# is mu mixing well here? (see take-home test 2)

# (11b) then you can make numerical summaries of the marginal 
#       posterior for the monitored quantity:

c( mean( mu.star ), sd( mu.star ), quantile( mu.star, 
  probs = c( 0.025, 0.975 ) ) )

#                             2.5%      97.5% 
#  1.5022744  1.0591046 -0.5498263  3.7844192 

print( rho.1.hat.mu.star <- cor( mu.star[ 1:( n.monitor - 1 ) ],
  mu.star[ 2:n.monitor ] ) )

# 0.5935416

# the reason for making the ACF and PACF plots is that if the time
# series of monitored iterations for a given quantity behaves like
# an AR1( rho ), then the Monte Carlo standard error (MCSE) of the 
# mean of this series is given by the following expression:

print( se.mu.star.mean <- ( sd( mu.star ) / sqrt( n.monitor ) ) *
  sqrt( ( 1 + rho.1.hat.mu.star ) / ( 1 - rho.1.hat.mu.star ) ) )

# 0.006631511

# this is also the right order of magnitude for the MCSE of the
# MCMC estimate of the posterior SD and the quantiles giving the
# 95% interval for the monitored quantity

# what do we conclude about the effect of low-dose aspirin
# on patients surviving a first heart attack in the population
# to which it's appropriate to generalize? (see take-home test 2)

##### summary for sigma

par( mfrow = c( 2, 2 ) )

plot( 1:n.monitor, sigma.star, type = 'l', xlab = 'Iteration Number',
  ylab = 'sigma' )

plot( density( sigma.star ), xlab = 'sigma', main = '', lwd = 2, 
  col = 'red' )

acf( as.ts( sigma.star[ 1:n.monitor ], start = 1, 
  end = n.monitor, frequency = 1 ), lag.max = 10, main = '' )

pacf( as.ts( sigma.star[ 1:n.monitor ], start = 1, 
  end = n.monitor, frequency = 1 ), lag.max = 10, main = '' )

par( mfrow = c( 1, 1 ) )

# the marginal posterior for sigma has a bit of a long right-hand
# tail, as you would expect for a scale parameter; the ACF and PACF
# plots show that the sigma time series behaves more or less like an
# AR1 with a first-order autocorrelation of about 0.45 (so the
# MCMC output for this parameter is not mixing quite as well as
# the output for mu, but 0.45 is still a fairly low autocorrelation)

c( mean( sigma.star ), sd( sigma.star ), quantile( sigma.star, 
  probs = c( 0.025, 0.975 ) ) )

#                       2.5%    97.5% 
# 1.894499 1.079572 0.258231 4.609201

print( rho.1.hat.sigma.star <- cor( sigma.star[ 1:( n.monitor - 1 ) ],
  sigma.star[ 2:n.monitor ] ) )

# [1] 0.7840861

print( se.sigma.star.mean <- ( sd( sigma.star ) / sqrt( n.monitor ) ) *
  sqrt( ( 1 + rho.1.hat.sigma.star ) / ( 1 - rho.1.hat.sigma.star ) ) )

# [1] 0.009813393

# what do we conclude about the extent of the between-study heterogeneity
# in the aspirin meta-analysis? 

##### summary for theta

# let's see what rjags has put into its output object for theta:

str( aspirin.meta.analysis.run.1.results$theta )

#  'mcarray' num [1:6, 1:100000, 1] 5.23 3.83 3.91 3.09 3.47 ...
#  - attr(*, "varname")= chr "theta"
#  - attr(*, "type")= chr "trace"
#  - attr(*, "iterations")= Named num [1:3] 2001 102000 1
#   ..- attr(*, "names")= chr [1:3] "start" "end" "thin"

# rjags has created its own object type, 'mcarray',
# which here appears as it it may behave like a matrix with 6 rows
# and 100000 columns (this is backwards from how we usually visualize
# the mcmc data set)

theta.star.matrix <- matrix( aspirin.meta.analysis.run.1.results$theta,
  100000, 6, byrow = T )

print( theta.star.mean <- apply( theta.star.matrix, 2, mean ) )

# [1]  2.084521  2.044840  1.592835  1.985556  1.811946 -0.424099

print( theta.star.sd <- apply( theta.star.matrix, 2, sd ) )

# [1] 1.315632 1.123722 1.541833 1.314509 1.433813 0.952523

par( mfrow = c( 3, 2 ) )

for ( i in 1:6 ) {

  plot( density( theta.star.matrix[ , i ] ), 
    xlab = paste( 'theta.', i, sep = '' ), main = '', lwd = 2,
    col = 'red' )
  
}

par( mfrow = c( 1, 1 ) )

# how do the bayesian results for the theta[ i ] compare with those
# from maximum likelihood? (see take-home test 2)

##### summary for positive.effect

# as j runs from 1 to 100000, positive.effect[ j ] is 1 if 
# mu.star[ j ] > 0 and 0 otherwise, so we can get a monte-carlo estimate
# of the posterior probability that low-dose aspirin would lower mortality
# in the population just by taking the mean of positive.effect:

print( posterior.probability.aspirin.is.beneficial <-
  mean( positive.effect.star ) )

# [1] 0.93224

# what does this imply about appropriate clinical use of low-dose aspirin?
# (see take-home test 2)