A guided walk through the Metropolis algorithmm

The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.

A guided walk through the Metropolis algorithmm

The following vignette contains code to accompany the paper “Markov unchained: a guided walk through the Metropolis algorithm.” The code walks the reader through

Reading in the data
Finding maximum likelihood estimates of the log odds ratio and risk difference
Simulating posterior distributions of the log odds ratio and risk difference using the following algorithms (described in the paper)
- Random walk metropolis
- Guided metropolis
- Guided, adaptive metropolis
- Guided, adaptive metropolis with normal priors
Using the “metropolis” r package to carry out the same analysis to simulate a posterior distribution for a logistic model under uniform or normal priors
Note each algorithm used here only runs for 10,000 iterations (to save computational time when installing package). The results will vary more than they would with the larger number of iterations used in the paper.

The data

A case control study of leukemia (y=1) and residence around strong magnetic fields (x=1)

y = c(rep(1, 36), rep(0, 198)) # leukemia cases
x = c(rep(1, 3), rep(0, 33), rep(1, 5), rep(0, 193)) # exposure

Helper functions

These functions will be used throughout

#helper functions
expit <- function(mu) 1/(1+exp(-mu))

loglik = function(y,x,beta){
  # calculate the log likelihood
  lli = dbinom(y, 1, expit(beta[1] + x*beta[2]), log=TRUE)
  sum(lli)
}

riskdifference = function(y,x,beta){
  # baseline odds (offset)
  # calculate a risk difference
  poprisk = 4.8/100000
  popodds = poprisk/(1-poprisk)
  studyodds = mean(y)/(1-mean(y))
  r1 = expit(log(popodds/studyodds) + beta[1] + beta[2])
  r0 = expit(log(popodds/studyodds) + beta[1])
  mean(r1-r0)
}

Maximum likelihood estimates

data = data.frame(leuk=y, magfield=x)
mod = glm(leuk ~ magfield, family=binomial(), data=data)
summary(mod)$coefficients

beta1 = summary(mod)$coefficients[2,1]
se1 = summary(mod)$coefficients[2,2]
cat("\n\nMaximum likelihood beta coefficient (95% CI)\n")
round(c(beta=beta1, ll=beta1+se1*qnorm(0.025), ul=beta1+se1*qnorm(0.975)), 2)

cat("\n\nMaximum likelihood odds ratio (95% CI)\n")
round(exp(c(beta=beta1, ll=beta1+se1*qnorm(0.025), ul=beta1+se1*qnorm(0.975))), 2)

cat("\n\nMaximum likelihood risk difference (multiplied by 1000) \n")
round(c(rd_1000=riskdifference(y,x,mod$coefficients)*1000), 2)

##              Estimate Std. Error   z value     Pr(>|z|)
## (Intercept) -1.766183   0.188373 -9.375988 6.853094e-21
## magfield     1.255357   0.754200  1.664488 9.601492e-02
## 
## 
## Maximum likelihood beta coefficient (95% CI)
##  beta    ll    ul 
##  1.26 -0.22  2.73 
## 
## 
## Maximum likelihood odds ratio (95% CI)
##  beta    ll    ul 
##  3.51  0.80 15.39 
## 
## 
## Maximum likelihood risk difference (multiplied by 1000) 
## rd_1000 
##    0.11

Random walk metropolis

# initialize
M=10000
set.seed(91828)
beta_post = matrix(nrow=M, ncol=2)
colnames(beta_post) = c('beta0', 'beta1')
accept = numeric(M)
rd = numeric(M)
beta_post[1,] = c(2,-3)
rd[1] = riskdifference(y,x,beta_post[1,])
accept[1] = 1
for(i in 2:M){
  oldb = beta_post[i-1,]
  prop = rnorm(2, sd=0.2)
  newb = oldb+prop
  num = loglik(y,x,newb)
  den = loglik(y,x,oldb)
  acceptprob = exp(num-den)
  acc = (acceptprob > runif(1))
  if(acc){
    beta_post[i,] = newb 
    accept[i] = 1
  }else{
    beta_post[i,] = oldb 
    accept[i] = 0
  }
  rd[i] = 1000*riskdifference(y,x,beta_post[i,])
}

Inspecting output

mean(accept)

## [1] 0.6551

summary(beta_post)

##      beta0            beta1        
##  Min.   :-2.518   Min.   :-3.9483  
##  1st Qu.:-1.902   1st Qu.: 0.7389  
##  Median :-1.776   Median : 1.2292  
##  Mean   :-1.770   Mean   : 1.1714  
##  3rd Qu.:-1.651   3rd Qu.: 1.7004  
##  Max.   : 2.000   Max.   : 3.9189

init = beta_post[1,]
postmean = apply(beta_post[-c(1:1000),], 2, mean)
cat("Posterior mean\n")

## Posterior mean

round(postmean, 2)

## beta0 beta1 
## -1.78  1.22

plot(beta_post, pch=19, col=rgb(0,0,0,0.05), xlab=expression(beta[0]), ylab=expression(beta[1]), xlim=c(-2.5,2.5), ylim=c(-4.5,4.5))
points(init[1], init[2], col="red", pch=19)
points(postmean[1], postmean[2], col="orange", pch=19)
legend("topright", col=c("red", "orange"), legend=c("Initial value", "Post. mean"), pch=19)

plot of chunk Inspecting

plot(beta_post[,2], type='l',  ylab=expression(beta[1]), xlab="Iteration", ylim=c(-4, 4))

plot of chunk Inspecting

plot(rd, type='l',  ylab="RD*1000", xlab="Iteration", ylim=c(-4, 4))

plot of chunk Inspecting

plot(density(beta_post[-c(1:1000),2]), xlab=expression(beta[1]), ylab="Density", main="")

plot of chunk Inspecting

plot(density(rd[-c(1:1000)]), xlab="RD*1000", ylab="Density", main="")

plot of chunk Inspecting

Guided metropolis

# initialize
M=10000
set.seed(91828)
beta_post_guide = matrix(nrow=M, ncol=2)
colnames(beta_post_guide) = c('beta0', 'beta1')
accept = numeric(M)
rd_guide = numeric(M)
beta_post_guide[1,] = c(2,-3)
rd_guide[1] = riskdifference(y,x,beta_post_guide[1,])
accept[1] = 1
dir = 1
for(i in 2:M){
  oldb = beta_post_guide[i-1,]
  prop = dir*abs(rnorm(2, sd=0.2))
  newb = oldb+prop
  num = loglik(y,x,newb)
  den = loglik(y,x,oldb)
  acceptprob = exp(num-den)
  acc = (acceptprob > runif(1))
  if(acc){
    beta_post_guide[i,] = newb 
    accept[i] = 1
  }else{
    beta_post_guide[i,] = oldb 
    accept[i] = 0
    dir = dir*-1
  }
  rd_guide[i] = 1000*riskdifference(y,x,beta_post_guide[i,])
}

postmean = apply(beta_post_guide[-c(1:1000),], 2, mean)
cat("Posterior mean, guided\n")

## Posterior mean, guided

round(postmean, 2)

## beta0 beta1 
## -1.80  1.41

Contrasting output with random walk

col1 = rgb(0,0,0,.5)
col2 = rgb(1,0,0,.35)
par(mfcol=c(1,2))

#trace plots
plot(beta_post[1:200,2], type='l',  ylab=expression(beta[1]), xlab="Iteration", ylim=c(-4, 4), col=col1)
lines(beta_post_guide[1:200,2], col=col2)
legend("topright", lty=1, col=c(col1, col2), legend=c("Rand. walk", "Guided"))
plot(9800:10000, beta_post[9800:10000,2], type='l',  ylab=expression(beta[1]), xlab="Iteration", ylim=c(-4, 4), col=col1)
lines(9800:10000, beta_post_guide[9800:10000,2], col=col2)
legend("topright", lty=1, col=c(col1, col2), legend=c("Rand. walk", "Guided"))

plot of chunk Comparing guided

# density plots
plot(density(beta_post_guide[-c(1:1000),2]), col=col2, xlab=expression(beta[1]), ylab="Density", main="")
lines(density(beta_post[-c(1:1000),2]), col=col1)
legend("bottomright", lty=1, col=c(col1, col2), legend=c("Rand. walk", "Guided"))


plot(density(rd_guide[-c(1:1000)]), xlab="RD*1000", ylab="Density", main="", col=col2)
lines(density(rd[-c(1:1000)]), col=col1)
legend("topright", lty=1, col=c(col1, col2), legend=c("Rand. walk", "Guided"))

plot of chunk Comparing guided

par(mfcol=c(1,1))

Guided, adaptive metropolis algorithm

# initialize
M=10000
burnin=1000
set.seed(91828)
beta_post_adaptguide = matrix(nrow=M+burnin, ncol=2)
colnames(beta_post_adaptguide) = c('beta0', 'beta1')
accept = numeric(M+burnin)
rd_adaptguide = numeric(M+burnin)
beta_post_adaptguide[1,] = c(2,-3)
rd_adaptguide[1] = riskdifference(y,x,beta_post[1,])
accept[1] = 1
prop.sigma = c(0.2, 0.2)
dir = 1
for(i in 2:(M+burnin)){
  if((i < burnin) & (i > 25)){
    prop.sigma = apply(beta_post_adaptguide[max(1, i-100):(i-1),], 2, sd)
  }
  oldb = beta_post_adaptguide[i-1,]
  prop = dir*abs(rnorm(2, sd=prop.sigma))
  newb = oldb+prop
  num = loglik(y,x,newb)
  den = loglik(y,x,oldb)
  acceptprob = exp(num-den)
  acc = (acceptprob > runif(1))
  if(acc){
    beta_post_adaptguide[i,] = newb 
    accept[i] = 1
  }else{
    beta_post_adaptguide[i,] = oldb 
    accept[i] = 0
    dir = dir*-1
  }
  rd_adaptguide[i] = 1000*riskdifference(y,x,beta_post_adaptguide[i,])
}
postmean = apply(beta_post_adaptguide[-c(1:1000),], 2, mean)
cat("Posterior mean, guided and adaptive\n")

## Posterior mean, guided and adaptive

round(postmean, 2)

## beta0 beta1 
## -1.78  1.22

Contrasting output

col1 = rgb(0,0,0,.5)
col2 = rgb(1,0,0,.35)
par(mfcol=c(1,2))

#trace plots
plot(beta_post[1:200,2], type='l',  ylab=expression(beta[1]), xlab="Iteration", ylim=c(-4, 4), col=col1)
lines(beta_post_adaptguide[1:200,2], col=col2)
legend("topright", lty=1, col=c(col1, col2), legend=c("Rand. walk", "Guided, adaptive"))
plot(9800:10000, beta_post[9800:10000,2], type='l',  ylab=expression(beta[1]), xlab="Iteration", ylim=c(-4, 4), col=col1)
lines(9800:10000, beta_post_adaptguide[9800:10000,2], col=col2)
legend("topright", lty=1, col=c(col1, col2), legend=c("Rand. walk", "Guided, adaptive"))

plot of chunk Comparing 2

# density plots
plot(density(beta_post_adaptguide[-c(1:1000),2]), col=col2, xlab=expression(beta[1]), ylab="Density", main="")
lines(density(beta_post[-c(1:1000),2]), col=col1)
legend("bottomright", lty=1, col=c(col1, col2), legend=c("Rand. walk", "Guided, adaptive"))


plot(density(rd_adaptguide[-c(1:1000)]), xlab="RD*1000", ylab="Density", main="", col=col2)
lines(density(rd[-c(1:1000)]), col=col1)
legend("topright", lty=1, col=c(col1, col2), legend=c("Rand. walk", "Guided, adaptive"))

plot of chunk Comparing 2

par(mfcol=c(1,1))

Guided, adaptive metropolis algorithm using normal priors

# initialize
M=10000
burnin=1000
set.seed(91828)
beta_post_adaptguide2 = matrix(nrow=M+burnin, ncol=2)
colnames(beta_post_adaptguide2) = c('beta0', 'beta1')
accept = numeric(M+burnin)
rd_adaptguide2 = numeric(M+burnin)
beta_post_adaptguide2[1,] = c(2,-3)
rd_adaptguide2[1] = riskdifference(y,x,beta_post[1,])
accept[1] = 1
prop.sigma = c(0.2, 0.2)
dir = 1
for(i in 2:(M+burnin)){
  if((i < burnin) & (i > 25)){
    prop.sigma = apply(beta_post_adaptguide2[max(1, i-100):(i-1),], 2, sd)
  }
  oldb = beta_post_adaptguide2[i-1,]
  prop = dir*abs(rnorm(2, sd=prop.sigma))
  newb = oldb+prop
  num = loglik(y,x,newb) + dnorm(newb[1], mean=0, sd=sqrt(100), log=TRUE) + dnorm(newb[2], mean=0, sd=sqrt(0.5), log=TRUE)
  den = loglik(y,x,oldb) + dnorm(oldb[1], mean=0, sd=sqrt(100), log=TRUE) + dnorm(oldb[2], mean=0, sd=sqrt(0.5), log=TRUE)
  acceptprob = exp(num-den)
  acc = (acceptprob > runif(1))
  if(acc){
    beta_post_adaptguide2[i,] = newb 
    accept[i] = 1
  }else{
    beta_post_adaptguide2[i,] = oldb 
    accept[i] = 0
    dir = dir*-1
  }
  rd_adaptguide2[i] = 1000*riskdifference(y,x,beta_post_adaptguide2[i,])
}
postmean = apply(beta_post_adaptguide2[-c(1:1000),], 2, mean)
cat("Posterior mean, guided and adaptive\n")

## Posterior mean, guided and adaptive

round(postmean, 2)

## beta0 beta1 
## -1.75  0.54

Inspecting output

mean(accept)

## [1] 0.5552727

init = beta_post_adaptguide[1,]
postmean = apply(beta_post_adaptguide[-c(1:1000),], 2, mean)
cat("Posterior mean, uniform priors\n")

## Posterior mean, uniform priors

round(postmean, 2)

## beta0 beta1 
## -1.78  1.22

init2 = beta_post_adaptguide2[1,]
postmean2 = apply(beta_post_adaptguide2[-c(1:1000),], 2, mean)
cat("Posterior mean, informative normal priors\n")

## Posterior mean, informative normal priors

round(postmean2, 2)

## beta0 beta1 
## -1.75  0.54

par(mfcol=c(1,2))
plot(beta_post_adaptguide, pch=19, col=rgb(0,0,0,0.05), xlab=expression(beta[0]), ylab=expression(beta[1]), xlim=c(-2.5,2.5), ylim=c(-4.5,4.5), main="Uniform priors")
points(init[1], init[2], col="red", pch=19)
points(postmean[1], postmean[2], col="orange", pch=19)
legend("topright", col=c("red", "orange"), legend=c("Initial value", "Post. mean"), pch=19)

plot(beta_post_adaptguide2, pch=19, col=rgb(0,0,0,0.05), xlab=expression(beta[0]), ylab=expression(beta[1]), xlim=c(-2.5,2.5), ylim=c(-4.5,4.5), main="Informative priors")
points(init2[1], init2[2], col="red", pch=19)
points(postmean2[1], postmean2[2], col="orange", pch=19)
legend("topright", col=c("red", "orange"), legend=c("Initial value", "Post. mean"), pch=19)