Creating Voting Data

library(tidyverse)

Based on the 2020 census: https://www.archives.gov/electoral-college/allocation

electoral_college <- read_csv("../data/electoral_college.csv") |> 
  separate_wider_delim(state, 
                       delim = " - ", 
                       names = c("state", "evotes")) |> 
  #mutate(evotes = as.numeric(str_extract(evotes, "[0-9]+")))
  mutate(evotes = parse_number(evotes)) |> 
  rbind(data.frame(state = c("ME-1", "ME-2", "NE-2"),
                   evotes = c(1,1,1))) |> 
  mutate(evotes = case_when(
    state == "Maine" ~ 2,
    state == "Nebraska" ~ 4,
    TRUE ~ evotes
  ))

electoral_college
# A tibble: 54 × 2
   state                evotes
   <chr>                 <dbl>
 1 Alabama                   9
 2 Alaska                    3
 3 Arizona                  11
 4 Arkansas                  6
 5 California               54
 6 Colorado                 10
 7 Connecticut               7
 8 Delaware                  3
 9 District of Columbia      3
10 Florida                  30
# ℹ 44 more rows

https://github.com/fivethirtyeight/data/blob/master/polls/2024-averages/presidential_general_averages_2024-09-12_uncorrected.csv

pres_polls <- read_csv("../data/presidential_general_averages.csv") |> 
  filter(state != "National") |> 
  mutate(pct = ifelse(is.na(pct_trend_adjusted), pct_estimate,
                      pct_trend_adjusted)) |> 
  select(candidate, date, state, pct) |> 
  pivot_wider(names_from = candidate, values_from = pct) |> 
  mutate(Trump = ifelse(is.na(Trump), `Donald Trump`, Trump)) |> 
  mutate(Biden = ifelse(is.na(Biden), `Joseph R. Biden Jr.`, Biden)) |> 
  select(date, state, Harris, Trump, Biden) |> 
  mutate(Harris = ifelse(is.na(Harris), Biden, Harris)) |> 
  select(-Biden) |> 
  mutate(date = lubridate::parse_date_time(date, orders = "mdy")) |> 
  arrange(state, desc(date)) |> 
  group_by(state) |> 
  slice_head(n = 1)
sim_data <- pres_polls |> 
  full_join(electoral_college, by = "state") |> 
  ungroup()

sim_data
# A tibble: 54 × 5
   date                state                Harris Trump evotes
   <dttm>              <chr>                 <dbl> <dbl>  <dbl>
 1 2020-11-03 00:00:00 Alabama                37.8 57.4       9
 2 2020-11-03 00:00:00 Alaska                 43.6 51.2       3
 3 2024-09-17 00:00:00 Arizona                46.9 47.3      11
 4 2020-11-03 00:00:00 Arkansas               36.2 58.9       6
 5 2024-09-17 00:00:00 California             59.5 35.3      54
 6 2020-11-03 00:00:00 Colorado               53.6 41.1      10
 7 2020-11-03 00:00:00 Connecticut            58.6 32.4       7
 8 2020-11-03 00:00:00 Delaware               58.9 34.6       3
 9 2020-11-03 00:00:00 District of Columbia   90.8  5.78      3
10 2024-09-17 00:00:00 Florida                45.4 49.4      30
# ℹ 44 more rows
election <- function(i, data) {
  rand_number <- runif(nrow(data)) * 100
  data |> 
    cbind(rand_number) |> 
    mutate(state_winner = case_when(
      rand_number < Harris ~ "Harris",
      rand_number < Harris + Trump ~ "Trump",
      TRUE ~ "Third Party")) |> 
    group_by(state_winner) |> 
    summarize(votes = sum(evotes)) |> 
    mutate(simulation = paste0("simulation", i))
}

Need 270 to win!

election(47, sim_data)
# A tibble: 3 × 3
  state_winner votes simulation  
  <chr>        <dbl> <chr>       
1 Harris         191 simulation47
2 Third Party     16 simulation47
3 Trump          331 simulation47
map(1:1000, election, data = sim_data) |> 
  list_rbind() |> 
  group_by(simulation) |> 
  slice_max(votes) |> 
  ungroup() |> 
  mutate(winner = case_when(
    votes >=270 ~ state_winner,
    TRUE ~ "no winner"
  )) |> 
  select(winner) |> 
  table()
winner
   Harris no winner     Trump 
      463       265       275 

:::