# using 'optim' to maximize the log likelihood function 
# to get the MLEs and their estimated standard errors
# in take-home test 2, problem 2(B)

# unbuffer the output

# enter the data:

y <- c( 2.77, 2.50, 1.84, 2.56, 2.32, -1.15 )

V <- c( 1.65, 1.31, 2.34, 1.67, 1.98, 0.90 )^2

# 'optim' has two quirks: (1) in the R function You write
# that optim will maximize (here our function will compute
# the log likelihood), the inputs to the function (for us
# these inputs are mu and sigma) must be in the form of a vector
# (here this will be eta = ( mu, sigma )), and (2) the
# default optimzation operation for 'optim' is *minimization*, 
# not maximization

# but that's easy to get around: minimizing a function f 
# is equivalent to maximizing - f

# to satisfy quirk (1), i need to rewrite the log-likelihood function
# that we used in the plots, so that its inputs are ( eta, y, V ),
# and then inside the new function i need to specify that
# mu is eta[ 1 ] and sigma is eta[ 2 ]; and to satisfy (2) we'll
# invoke optim with a control parameter that forces it to
# minimize - 1 times the log-likelihood function 

aspirin.mortality.log.likelihood.for.optim <- function( eta, y, V ) {

  mu <- eta[ 1 ]

  sigma <- eta[ 2 ]

  ll <- ( - 1 / 2 ) * sum( log( V + sigma^2 ) + ( y - mu )^2 /
    ( V + sigma^2 ) )

  return( ll )

} 

# optim conducts an iterative search --- for us this will be
# in ( mu, sigma ) space --- for the values of mu and sigma
# that minimize - ll; it requires us to supply it with
# initial or starting values for this search

# from the plots it looks like the values of mu and sigma
# that maximize the log-likelihood function are about 1.5 and 1.25,
# respectively, so let's use those as initial values: 

eta.initial.values <- c( 1.5, 1.25 )

print( ml.results.1 <- optim( eta.initial.values, 
  aspirin.mortality.log.likelihood.for.optim, y = y, V = V, 
  hessian = T, control = list( fnscale = -1 ) ) )

# check that the output $convergence is 0; this indicates
# successful optimization 

# the output $par gives the MLE vector

# the output $value is the value of the log-likelihood function
# at its maximum

# the output $counts tells You how many function and gradient evaluations
# optim needed to find the maximum to within a default tolerance
# of about 10^( -8 ) on (in our case) the log-likelihood scale

# we know from class that the covariance matrix of the vector of MLEs
# is the inverse of minus the hessian of the log-likelihood function
# evaluated at the maximum, which is the hessian reported by optim,
# so we need to invert a matrix

# R tries to do two things at once when inverting matrices:
# the built-in function call 'solve( A, B )' solves the linear system
# A X = B , and if B is missing from the function call it assumes
# that B is the identity matrix, so that X (which is what is returned
# from 'solve') is the inverse of A

print( maximum.likelihood.covariance.matrix <- 
  solve( - ml.results.1$hessian ) )

# how can the estimated standard errors of mu.hat.mle and sigma.hat.mle
# be computed from this matrix?

# complete the command

#   print( maximum.likelihood.estimated.standard.errors <- ... )

# to get the standard errors; hint: the R built-in function call
# 'diag( X )' returns the diagonal of the matrix X