A simple Bayesian bandit

“Bayesian bandit” is the common name given to the idea of using a Bayesian updating procedure to continuously exploit information collected from an operating environment. This is in contrast to a “one-shot” A/B test in which exploration and exploitation steps are sequential.

library(tidyverse)

## ── Attaching packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──

## ✔ ggplot2 3.1.0       ✔ purrr   0.3.1  
## ✔ tibble  2.0.1       ✔ dplyr   0.8.0.1
## ✔ tidyr   0.8.3       ✔ stringr 1.4.0  
## ✔ readr   1.1.1       ✔ forcats 0.3.0

## ── Conflicts ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()

library(patchwork)
library(ggthemes)
library(animation)
# brew install imagemagick
# check with: convert -v

set.seed(0)

knitr::opts_chunk$set(fig.width = 12, fig.height = 8, fig.retina = TRUE)

perform_bandit_round <- function(x) {
  x %>%
    mutate(draw = pmap_dbl(
      .l = list(oracle_conv_rates, x = outcome, y = round),
      function(x, y, ...) { rbeta(1, x + 1, y - x + 1) })) %>%
    filter(draw == max(draw)) %>%
    mutate(outcome = rbinom(1, 1, oracle_conv_rates),
           round = round + 1) %>%
    select(-draw) %>%
    bind_rows(x) %>%
    group_by(group, oracle_conv_rates, overall_round) %>%
    summarise(outcome = sum(outcome, na.rm = TRUE),
              round = max(round, na.rm = TRUE)) %>%
    ungroup() %>%
    mutate(obs_conv_rate = outcome / round,
           overall_round = overall_round + 1)
}

(tibble(oracle_conv_rates = c(0.08, 0.16, 0.32)) %>%
    mutate(group = factor(oracle_conv_rates),
           outcome = 0,
           round = 0) %>%
    mutate(overall_round = 0) %>%
    perform_bandit_round() -> x)

seq_len(3e2) %>%
  accumulate(function(x, i) {
    perform_bandit_round(x) %>%
      mutate(overall_round = i + 1)
  }, .init = x) %>%
  bind_rows() %>%
  ungroup() -> all_rounds

saveGIF({
  for(i in 1:length(unique(all_rounds$overall_round))) {
    all_rounds %>%
      filter(overall_round == !!i) %>%
      {
        (.) -> x
        beta_shapes <- function(i) {
          list(shape1 = 1 + x[i,]$outcome,
               shape2 = 1 + x[i,]$round - x[i,]$outcome)
        }
        p1 <- ggplot(data = tibble(x = c(0, 1)), aes(x = x)) +
          stat_function(fun = dbeta, args = beta_shapes(1), color = "#000000") +
          stat_function(fun = dbeta, args = beta_shapes(2), color = "#E69F00") +
          stat_function(fun = dbeta, args = beta_shapes(3), color = "#56B4E9") +
          labs(x = "Conversion rate", y = "Density") +
          ggtitle("Bayesian bandit")
        
        p2 <- x %>%
          ggplot(aes(x = factor(group), fill = factor(group))) +
          geom_bar(aes(y = round), stat = "identity", color = "black") +
          geom_text(aes(label = round, y = round*1.1)) +
          coord_flip() +
          scale_fill_colorblind(name = "Unknown conv rate") +
          labs(x = "Unknown conv rate") +
          theme(legend.text = element_text(size = 16),
                axis.text.y = element_text(size = 16))
        
        print(p1 + p2)
      }
  }
},
movie.name = "bayesian_bandit.gif",
interval = 0.1, nmax = 20,
ani.width = 600)

## Output at: bayesian_bandit.gif

## [1] TRUE

@statwonk