# We are going to step through Jake's coin flip example to get a sense of what is involved # in doing Bayesian inference. # Here is the likelihood function for a binomial distribution, with N trials and h heads: likelihood <- function (N, h, theta) theta^h * (1 - theta)^(N-h) # Check that the likelihood function makes sense t <- (0:100) / 100 png ("figure1.png", width=800, height=600) par (mfrow=c(2,2)) par (bty='n') par (col='red') plot (t, likelihood(100, 50, t), type='l', xlab='Theta Hat', ylab="Likelihood", main='Likelihood (t=0.5)') # Great, the maximum likelihood for 50 heads from 100 flips is a theta of 0.5. prior <- function (theta, a, b) dbeta (theta, a, b) # Lets have a look at our initial prior a <- 2 b <- 2 plot (t, prior(t, a, b), type ='l', xlab='Theta Hat', ylab='Pr(theta|a,b)', main='Prior') # Using the analytic form of the posterior: evidence <- function (N, h, a, b) beta(h + a, N - h + b) / beta (a, b) posterior <- function (theta, N, h, a, b) likelihood (N, h, theta) * prior(theta, a, b) / evidence (N, h, a, b) plot (t, posterior(t, 100, 50, a, b), type ='l', xlab='Theta Hat', ylab='Pr(theta|Observations,a,b)', main='Posterior (t=0.5)') plot (t, posterior(t, 100, 70, a, b), type ='l', xlab='Theta Hat', ylab='Pr(theta|Observations,a,b)', main='Posterior (t=0.7)') dev.off() # Let's say we don't know what the analytic form for the evidence() is, and replace it by a numerical integration # over all possible theta's from 0 to 1: evidenceN <- function (N, h, a, b) integrate (function(t) likelihood (N,h,t) * prior (t,a,b), 0, 1)$value posteriorN <- function (theta, N, h, a, b) likelihood (N, h, theta) * prior(theta, a, b) / evidenceN (N, h, a, b) N <- 100 # Trials h <- 70 # Heads png ("figure2.png", width=800, height=600) par (mfrow=c(1,1)) par (bty='n') analytic <- posterior (t, N, h, a, b) estimated <- posteriorN(t, N, h, a, b) plot (t, analytic, type ='l', xlab='Theta Hat', ylab='Pr(theta|Observations,a,b)', main='Posterior (t=0.7)', col='red', lty=1) lines (t, estimated, type ='l', xlab='Theta Hat', ylab='Pr(theta|Observations,a,b)', col='blue', lty=2) err <- (analytic - estimated)^2 lines (t, (err - min(err)) / diff(range(err)) * max(analytic), lty=3, col='black') legend (0,2, c('Analytic','Estimated','Error^2 (scaled)'), col=c('red','blue','black'), lty=c(1,2,3), bty='n', text.col='black') dev.off() # While things are pretty simple with this toy example, Jake made the point that real difficulty with Bayesian inference # is twofold: # 1. Integrating across theta to find the evidence (denominator) # 2. Once you have the posterior, integrating it to calculate summary statistics (mean, variance, etc.) # In the above example we used the integrate() function to apply adaptive quadrature to find the evidence. # We could use this method for 2, but lets not. Instead, let us use MCMC - which is at its core, a way # to draw samples from a distribution that is otherwise hard to sample from. # Given that this example is rather trivial, with just one parameter in question (theta), I won't step # through the implementations of vanilla Monte Carlo methods (uniform, importance & rejection sampling) # These implementations are pretty much straight forward from Jake's presentation. # I will however, implement a simple Metropolis-Hastings MCMC sampler using a simple and symmetric # Gaussian proposal density (q in Jake's notes). MHstep <- function (pdf, prevCandidate) { newCandidate <- prevCandidate + rnorm (1, mean=0, sd=0.1) # Effectively we are taking a random walk. a <- pdf(newCandidate) / pdf(prevCandidate) # NB: Because we are using the normal distribution # as our proposal density, which is symmetrical, # we cancel out the q terms on the numerator and # denominator, as q(x|y) = q(y|x) u <- runif(1) # Draw a uniform random number from 0 to 1 if (a > u) { # This candidate is likely to be a better sample return (newCandidate) } # Else, stick with our previous candidate return (prevCandidate) } # Let's use our numerical approximate to the actual posterior function as the PDF we want # to draw samples from: posteriorPDF <- function (t) posteriorN (t, N, h, a, b) # Time to go on a random walk down coin flip street. steps <- 1000 samples <- matrix(NA, steps) samples[1] <- 0.5 # initial guess for (i in 2:steps) { samples[i] <- MHstep (posteriorPDF, samples[i-1]) } # And how did we do? png ("figure3.png", width=800, height=600) par(bty='n') par(col='red') plot(cumsum(samples)/1:steps, type='l', xlab='Step', ylab='Estimated Mean', main='Drawing samples by Metropolis-Hastings') dev.off()