########################################################################## # R code for the sensitivity analysis in case study 1 # some numerical work first, then graphical ########################################################################## # this is how to write functions in R false.positive.rate <- function( alpha, beta, gamma ) { fpr <- ( 1 - alpha ) * ( 1 - gamma ) / ( alpha * beta + ( 1 - alpha ) * ( 1 - gamma ) ) return( fpr ) } ls( ) # [1] "false.positive.rate" # technical note: all 'commands' in R are actually function calls; # when you type a standard R 'command' you're just calling one # of the (many) built-in functions (e.g., 'ls( )' ) # in R under OS windows there's an autocomplete feature: # if you want to refer to an existing object in the # current working directory, you can begin typing its name # and hit the 'Tab' key at any time; starting after the # third character you type, R will autocomplete the name # of the object if you've uniquely specified it with the # first three characters; if not you will need to keep # typing characters and hitting 'Tab' until the object # is uniquely identified # for example, in the function call str( false.positive.rate ) # i only had to type 'str( fal' and hit 'Tab', and R auto-completed # to the full object name 'false.positive.rate'; i still had # to type the closing parenthesis to finish invoking 'str( )', # which yielded # function (alpha, beta, gamma) # - attr(*, "srcref")= 'srcref' int [1:8] 1 24 8 1 24 1 1 8 # ..- attr(*, "srcfile")=Classes 'srcfilecopy', # 'srcfile' # you can ignore the last three lines of the output above # (too far down-in-the-weeds for now); just notice that # R has acknowledged that 'false.positive.rate' is a function # with inputs '(alpha, beta, gamma)' # you can see what R thinks a function looks like just by # typing its name: false.positive.rate # function( alpha, beta, gamma ) { # fpr <- ( 1 - alpha ) * ( 1 - gamma ) / # ( alpha * beta + ( 1 - alpha ) * ( 1 - gamma ) ) # return( fpr ) # } # some comments: # (1) object names cannot begin with numbers in R # you can make almost arbirarily long names by # linking words together with . (period) or _ (underscore) # i like . better because it's in lower case # and doesn't require the use of the shift key, as with _ # you cannot use - (dash) to create a compound name # because - is reserved for subtraction # i like long names because they can be chosen to be # self-documenting (like most of us, i'm lazy about # documenting my code); there's not much ambiguity about # what the function 'false.positive.rate' does in the # context of Case Study 1 # (2) another choice available to you: 'text' and "text" # are treated identically in R, with what's between # the quotes regarded as a character string; i prefer ' # for the same reason as . (it's in lower case) # (3) yet another choice: the assignment operators <- and = # can be used interchangeably; i very much prefer <- , # because = can easily be confused with == , which in R # is the comparison operator for equality: # ( 1 = 2 ) # produces # Error in 1 = 2 : invalid (do_set) left-hand side to assignment # whereas # ( 1 == 2 ) # produces # [1] FALSE # (4) parentheses are needed in R to override the standard order # in which arithmetic operations occur: in the expression # 1 - alpha * ( 1 - gamma ) # # R will first compute 1 - gamma, then multiply the result # by alpha, and then subtract the result from 1; by contrast, # ( 1 - alpha ) * ( 1 - gamma ) # will force R to compute 1 - alpha and 1 - gamma, store # those quantities in temporary places, and then multiply them, # which is what i need in the fpr function above false.negative.rate <- function( alpha, beta, gamma ) { fnr <- alpha * ( 1 - beta ) / ( alpha * ( 1 - beta ) + gamma * ( 1 - alpha ) ) return( fnr ) } # fpr and fnr are vectorized one input at a time, # but not across multiple inputs # this means that any one of their inputs can be a vector # and the output of the function will also be a vector, # but if you give these functions vectors in two or more # of their inputs, what comes out will not be what you want cbind( c( 0.005, 0.01, 0.02 ), c( false.positive.rate( 0.005, 0.999, 0.994 ), false.positive.rate( 0.01, 0.999, 0.994 ), false.positive.rate( 0.02, 0.999, 0.994 ) ) ) # cbind stands for 'column bind' and c for 'combine' # issue the commands help( cbind ) and help( c ) # to get more information on how they work # false positive rate as a function of alpha # alpha fpr # [,1] [,2] # [1,] 0.005 0.5444596 # [2,] 0.010 0.3728814 # [3,] 0.020 0.2273782 abs( ( 0.5444596 - 0.3728814 ) / 0.3728814 ) # [1] 0.4601415 # holding sensitivity and specificity constant, # the false positive rate goes up by 46% when # the prevalence is cut from 1% to 0.5% abs( ( 0.2273782 - 0.3728814 ) / 0.3728814 ) # [1] 0.3902131 # holding sensitivity and specificity constant, # the false positive rate drops by 39% when # the prevalence doubles from 1% to 2% cbind( c( 0.005, 0.01, 0.02 ), c( false.negative.rate( 0.005, 0.999, 0.994 ), false.negative.rate( 0.01, 0.999, 0.994 ), false.negative.rate( 0.02, 0.999, 0.994 ) ) ) # false negative rate as a function of alpha # alpha fnr # [,1] [,2] # [1,] 0.005 5.055433e-06 # [2,] 0.010 1.016188e-05 # [3,] 0.020 2.053093e-05 # the false negative rate remains extremely low # when prevalence is cut in half or doubled ########################################################################## # important warning about writing functions in R # this is still a bug in the latest release of R (3.5.2) f.1 <- function( x, y ) { z <- x + y return( z ) } f.1( 1, 2 ) # [1] 3 f.2 <- function( x, y ) { z <- x + y return( z ) } f.2( 1, 2 ) # [1] 3 f.3 <- function( x, y ) { z <- x + y return( z ) } f.3( 1, 2 ) # [1] 1 # no warning that the code '+ y' was not used! # moral: if you're going to continue a line inside a function # to the next line, which i highly recommend for readability, # you *must* end the first line in such a way that it's # not a valid R command in and of itself # look back at my functions false.positive.rate and # false.negative.rate for valid examples of this ########################################################################## # now let's plot the false positive rate as a function of alpha # use help( plot ) to read about introductory plotting in R # the basic 2-dimensional plot in R is a scatterplot: # with x = ( x.1, ..., x.n ) and y = ( y.1, ..., y.n ), # plot( x, y ) plots the points ( x.1, y.1 ), ..., ( x.n, y.n ) # i want to plot the curve false.positive.rate( alpha, 0.999, 0.994 ) # as alpha ranges from (say) 0 to 0.1 # to do this in R i have to create a grid of alpha values # that spans that domain: alpha.grid <- seq( 0, 0.1, length = 500 ) # read about the sequence command seq with help( seq ); # the command above creates a vector of length 500 # containing equally-spaced values starting with 0 # and ending with 0.1 # if i just plot alpha.grid against the corresponding fpr values, # i'll get a scatterplot with 500 points, but if i use the # type = 'l' # option, R will connect the points together with tiny line segments # and the plot will look like a smooth curve: plot( alpha.grid, false.positive.rate( alpha.grid, 0.999, 0.994 ), type = 'l', xlab = 'alpha (prevalence)', ylab = 'false positive rate', lwd = 2, col = 'red', ylim = c( 0, 1 ), main = '( beta, gamma ) = ( 0.999, 0.994 )' ) # xlab and ylab define labels for the axes; lwd = 2 makes the curve # twice as wide as the default; col = 'red' makes the curve red; # ylim = c( 0, 1 ) creates lower and upper limits of 0 and 1, # respectively, for the y axis; and main gives the plot a title # a good plot always has axes labeled in a meaningful way # and a title that tells the viewer what's going on plot( alpha.grid, false.negative.rate( alpha.grid, 0.999, 0.994 ), type = 'l', xlab = 'alpha (prevalence)', ylab = 'false negative rate', lwd = 2, col = 'red', main = '( beta, gamma ) = ( 0.999, 0.994 )' ) par( mfrow = c( 2, 1 ) ) plot( alpha.grid, false.positive.rate( alpha.grid, 0.999, 0.994 ), type = 'l', xlab = 'alpha (prevalence)', ylab = 'false positive rate', lwd = 2, col = 'red', ylim = c( 0, 1 ), main = '( beta, gamma ) = ( 0.999, 0.994 )' ) plot( alpha.grid, false.negative.rate( alpha.grid, 0.999, 0.994 ), type = 'l', xlab = 'alpha (prevalence)', ylab = 'false negative rate', lwd = 2, col = 'red', main = '( beta, gamma ) = ( 0.999, 0.994 )' ) par( mfrow = c( 1, 1 ) ) pdf( 'case-study-1.pdf' ) par( mfrow = c( 2, 1 ) ) plot( alpha.grid, false.positive.rate( alpha.grid, 0.999, 0.994 ), type = 'l', xlab = 'alpha (prevalence)', ylab = 'false positive rate', lwd = 2, col = 'red', ylim = c( 0, 1 ), main = '( beta, gamma ) = ( 0.999, 0.994 )' ) plot( alpha.grid, false.negative.rate( alpha.grid, 0.999, 0.994 ), type = 'l', xlab = 'alpha (prevalence)', ylab = 'false negative rate', lwd = 2, col = 'red', main = '( beta, gamma ) = ( 0.999, 0.994 )' ) par( mfrow = c( 1, 1 ) ) dev.off( ) ########################################################################## ########################################################################## # maple code for the sensitivity analysis in case study 1 assume( alpha > 0, alpha < 1, beta > 0, beta < 1, Gamma > 0, Gamma < 1 ); fpr := ( alpha, beta, Gamma ) -> ( 1 - alpha ) * ( 1 - Gamma ) / ( alpha * beta + ( 1 - alpha ) * ( 1 - Gamma ) ); simplify( diff( fpr( alpha, beta, Gamma ), alpha ) ); # ( - 1 + Gamma ) * beta / ( ( beta - 1 + Gamma ) * alpha - Gamma + 1 )^2 is( simplify( diff( fpr( alpha, beta, Gamma ), alpha ) ), RealRange( - infinity, 0 ) ); # true fnr := ( alpha, beta, Gamma ) -> alpha * ( 1 - beta ) / ( alpha * ( 1 - beta ) + Gamma * ( 1 - alpha ) ); simplify( diff( fnr( alpha, beta, Gamma ), alpha ) ); # - ( beta - 1 ) * Gamma / ( ( beta - 1 + Gamma ) * alpha - Gamma )^2 is( simplify( diff( fnr( alpha, beta, Gamma ), alpha ) ), RealRange( 0, infinity ) ); # true ########################################################################## ##########################################################################