library(tidyverse)Creating Voting Data
Based on the 2020 census: https://www.archives.gov/electoral-college/allocation
electoral_college <- read_csv("../data/electoral_college.csv") |>
separate_wider_delim(state,
delim = " - ",
names = c("state", "evotes")) |>
#mutate(evotes = as.numeric(str_extract(evotes, "[0-9]+")))
mutate(evotes = parse_number(evotes)) |>
rbind(data.frame(state = c("ME-1", "ME-2", "NE-2"),
evotes = c(1,1,1))) |>
mutate(evotes = case_when(
state == "Maine" ~ 2,
state == "Nebraska" ~ 4,
TRUE ~ evotes
))
electoral_college# A tibble: 54 × 2
state evotes
<chr> <dbl>
1 Alabama 9
2 Alaska 3
3 Arizona 11
4 Arkansas 6
5 California 54
6 Colorado 10
7 Connecticut 7
8 Delaware 3
9 District of Columbia 3
10 Florida 30
# ℹ 44 more rows
https://github.com/fivethirtyeight/data/blob/master/polls/2024-averages/presidential_general_averages_2024-09-12_uncorrected.csv
pres_polls <- read_csv("../data/presidential_general_averages.csv") |>
filter(state != "National") |>
mutate(pct = ifelse(is.na(pct_trend_adjusted), pct_estimate,
pct_trend_adjusted)) |>
select(candidate, date, state, pct) |>
pivot_wider(names_from = candidate, values_from = pct) |>
mutate(Trump = ifelse(is.na(Trump), `Donald Trump`, Trump)) |>
mutate(Biden = ifelse(is.na(Biden), `Joseph R. Biden Jr.`, Biden)) |>
select(date, state, Harris, Trump, Biden) |>
mutate(Harris = ifelse(is.na(Harris), Biden, Harris)) |>
select(-Biden) |>
mutate(date = lubridate::parse_date_time(date, orders = "mdy")) |>
arrange(state, desc(date)) |>
group_by(state) |>
slice_head(n = 1)sim_data <- pres_polls |>
full_join(electoral_college, by = "state") |>
ungroup()
sim_data# A tibble: 54 × 5
date state Harris Trump evotes
<dttm> <chr> <dbl> <dbl> <dbl>
1 2020-11-03 00:00:00 Alabama 37.8 57.4 9
2 2020-11-03 00:00:00 Alaska 43.6 51.2 3
3 2024-09-17 00:00:00 Arizona 46.9 47.3 11
4 2020-11-03 00:00:00 Arkansas 36.2 58.9 6
5 2024-09-17 00:00:00 California 59.5 35.3 54
6 2020-11-03 00:00:00 Colorado 53.6 41.1 10
7 2020-11-03 00:00:00 Connecticut 58.6 32.4 7
8 2020-11-03 00:00:00 Delaware 58.9 34.6 3
9 2020-11-03 00:00:00 District of Columbia 90.8 5.78 3
10 2024-09-17 00:00:00 Florida 45.4 49.4 30
# ℹ 44 more rows
election <- function(i, data) {
rand_number <- runif(nrow(data)) * 100
data |>
cbind(rand_number) |>
mutate(state_winner = case_when(
rand_number < Harris ~ "Harris",
rand_number < Harris + Trump ~ "Trump",
TRUE ~ "Third Party")) |>
group_by(state_winner) |>
summarize(votes = sum(evotes)) |>
mutate(simulation = paste0("simulation", i))
}Need 270 to win!
election(47, sim_data)# A tibble: 3 × 3
state_winner votes simulation
<chr> <dbl> <chr>
1 Harris 191 simulation47
2 Third Party 16 simulation47
3 Trump 331 simulation47
map(1:1000, election, data = sim_data) |>
list_rbind() |>
group_by(simulation) |>
slice_max(votes) |>
ungroup() |>
mutate(winner = case_when(
votes >=270 ~ state_winner,
TRUE ~ "no winner"
)) |>
select(winner) |>
table()winner
Harris no winner Trump
463 265 275
:::