Computational Statistics - Working with Data in R

From wide to long and long to wide

pivot_longer() makes the dataframe “longer” – many columns into a few columns (more rows): pivot_longer(data, cols, names_to = , value_to = )
pivot_wider() makes the dataframe “wider” – a few columns into many columns (fewer rows): pivot_wider(data, names_from = , values_from = )

From wide to long and long to wide

`pivot_longer`

pivot_longer will be demonstrated using datasets from GapMinder.

litF represents country, year, and female literacy rate.

library(googlesheets4)
gs4_deauth()
litF <- read_sheet("https://docs.google.com/spreadsheets/d/1hDinTIRHQIaZg1RUn6Z_6mo12PtKwEPFIz_mJVF6P5I/pub?gid=0")

litF

# A tibble: 260 × 38
   Adult (15+) literacy rate …¹ `1975` `1976` `1977` `1978` `1979` `1980` `1981`
   <chr>                         <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
 1 Afghanistan                      NA     NA     NA     NA   4.99   NA       NA
 2 Albania                          NA     NA     NA     NA  NA      NA       NA
 3 Algeria                          NA     NA     NA     NA  NA      NA       NA
 4 Andorra                          NA     NA     NA     NA  NA      NA       NA
 5 Angola                           NA     NA     NA     NA  NA      NA       NA
 6 Anguilla                         NA     NA     NA     NA  NA      NA       NA
 7 Antigua and Barbuda              NA     NA     NA     NA  NA      NA       NA
 8 Argentina                        NA     NA     NA     NA  NA      93.6     NA
 9 Armenia                          NA     NA     NA     NA  NA      NA       NA
10 Aruba                            NA     NA     NA     NA  NA      NA       NA
# ℹ 250 more rows
# ℹ abbreviated name: ¹`Adult (15+) literacy rate (%). Female`
# ℹ 30 more variables: `1982` <dbl>, `1983` <dbl>, `1984` <dbl>, `1985` <dbl>,
#   `1986` <dbl>, `1987` <dbl>, `1988` <dbl>, `1989` <dbl>, `1990` <dbl>,
#   `1991` <dbl>, `1992` <dbl>, `1993` <dbl>, `1994` <dbl>, `1995` <dbl>,
#   `1996` <dbl>, `1997` <dbl>, `1998` <dbl>, `1999` <dbl>, `2000` <dbl>,
#   `2001` <dbl>, `2002` <dbl>, `2003` <dbl>, `2004` <dbl>, `2005` <dbl>, …

`pivot_longer`

litF <- litF |> 
  select(country = starts_with("Adult"), everything()) |> 
  pivot_longer(cols = -country,
               names_to = "year", 
               values_to = "litRateF") |> 
  filter(!is.na(litRateF))

litF

# A tibble: 571 × 3
   country     year  litRateF
   <chr>       <chr>    <dbl>
 1 Afghanistan 1979      4.99
 2 Afghanistan 2011     13   
 3 Albania     2001     98.3 
 4 Albania     2008     94.7 
 5 Albania     2011     95.7 
 6 Algeria     1987     35.8 
 7 Algeria     2002     60.1 
 8 Algeria     2006     63.9 
 9 Angola      2001     54.2 
10 Angola      2011     58.6 
# ℹ 561 more rows

`pivot_longer`

GDP represents country, year, and gdp (in fixed 2000 US$).

GDP <- read_sheet("https://docs.google.com/spreadsheets/d/1RctTQmKB0hzbm1E8rGcufYdMshRdhmYdeL29nXqmvsc/pub?gid=0")

GDP <- GDP |>  
  select(country = starts_with("Income"), everything()) |> 
  pivot_longer(cols = -country, 
               names_to = "year",
               values_to = "gdp") |> 
  filter(!is.na(gdp))

GDP

# A tibble: 7,988 × 3
   country year    gdp
   <chr>   <chr> <dbl>
 1 Albania 1980  1061.
 2 Albania 1981  1100.
 3 Albania 1982  1111.
 4 Albania 1983  1101.
 5 Albania 1984  1065.
 6 Albania 1985  1060.
 7 Albania 1986  1092.
 8 Albania 1987  1054.
 9 Albania 1988  1014.
10 Albania 1989  1092.
# ℹ 7,978 more rows

`pivot_wider`

pivot_wider will be demonstrated using the babynames dataset.

library(babynames)

babynames

# A tibble: 1,924,665 × 5
    year sex   name          n   prop
   <dbl> <chr> <chr>     <int>  <dbl>
 1  1880 F     Mary       7065 0.0724
 2  1880 F     Anna       2604 0.0267
 3  1880 F     Emma       2003 0.0205
 4  1880 F     Elizabeth  1939 0.0199
 5  1880 F     Minnie     1746 0.0179
 6  1880 F     Margaret   1578 0.0162
 7  1880 F     Ida        1472 0.0151
 8  1880 F     Alice      1414 0.0145
 9  1880 F     Bertha     1320 0.0135
10  1880 F     Sarah      1288 0.0132
# ℹ 1,924,655 more rows

`pivot_wider` babynames

babynames |>  
  select(-prop) |> 
  pivot_wider(names_from = sex, values_from = n)

# A tibble: 1,756,284 × 4
    year name          F     M
   <dbl> <chr>     <int> <int>
 1  1880 Mary       7065    27
 2  1880 Anna       2604    12
 3  1880 Emma       2003    10
 4  1880 Elizabeth  1939     9
 5  1880 Minnie     1746     9
 6  1880 Margaret   1578    NA
 7  1880 Ida        1472     8
 8  1880 Alice      1414    NA
 9  1880 Bertha     1320    NA
10  1880 Sarah      1288    NA
# ℹ 1,756,274 more rows

`pivot_wider` babynames

babynames |>  
  select(-prop) |>  
  pivot_wider(names_from = sex, values_from = n) |> 
  filter(!is.na(F), !is.na(M)) |> 
  arrange(desc(year), desc(F))

# A tibble: 168,381 × 4
    year name          F     M
   <dbl> <chr>     <int> <int>
 1  2017 Emma      19738    14
 2  2017 Olivia    18632    10
 3  2017 Ava       15902    12
 4  2017 Isabella  15100    12
 5  2017 Sophia    14831    17
 6  2017 Mia       13437    16
 7  2017 Charlotte 12893     7
 8  2017 Amelia    11800    11
 9  2017 Evelyn    10675    11
10  2017 Abigail   10551     6
# ℹ 168,371 more rows

`pivot_wider` babynames

babynames |>  
  pivot_wider(names_from = sex, values_from = n) |> 
  mutate(maxcount = pmax(F, M, na.rm = TRUE)) |> 
  arrange(desc(maxcount))

# A tibble: 1,924,653 × 6
    year name      prop     F     M maxcount
   <dbl> <chr>    <dbl> <int> <int>    <int>
 1  1947 Linda   0.0548 99686    NA    99686
 2  1948 Linda   0.0552 96209    NA    96209
 3  1947 James   0.0510    NA 94756    94756
 4  1957 Michael 0.0424    NA 92695    92695
 5  1947 Robert  0.0493    NA 91642    91642
 6  1949 Linda   0.0518 91016    NA    91016
 7  1956 Michael 0.0423    NA 90620    90620
 8  1958 Michael 0.0420    NA 90520    90520
 9  1948 James   0.0497    NA 88588    88588
10  1954 Michael 0.0428    NA 88514    88514
# ℹ 1,924,643 more rows

Relational data (multiple data frames)

See the Posit cheatsheets on wrangling & joining and pivoting.

Joining two (or more) dataframes:

left_join returns all rows from the left table, and any rows with matching keys from the right table.
inner_join returns only the rows in which the left table have matching keys in the right table (i.e., matching rows in both sets).
full_join returns all rows from both tables, join records from the left which have matching keys in the right table.

Good practice: always specify the by argument when joining data frames.

Women in Science

10 women in science who changed the world (source: Discover Magazine)¹

name	profession
Ada Lovelace	Mathematician
Marie Curie	Physicist and Chemist
Janaki Ammal	Botanist
Chien-Shiung Wu	Physicist
Katherine Johnson	Mathematician
Rosalind Franklin	Chemist
Vera Rubin	Astronomer
Gladys West	Mathematician
Flossie Wong-Staal	Virologist and Molecular Biologist
Jennifer Doudna	Biochemist

Inputs

professions

# A tibble: 10 × 2
   name               profession                        
   <chr>              <chr>                             
 1 Ada Lovelace       Mathematician                     
 2 Marie Curie        Physicist and Chemist             
 3 Janaki Ammal       Botanist                          
 4 Chien-Shiung Wu    Physicist                         
 5 Katherine Johnson  Mathematician                     
 6 Rosalind Franklin  Chemist                           
 7 Vera Rubin         Astronomer                        
 8 Gladys West        Mathematician                     
 9 Flossie Wong-Staal Virologist and Molecular Biologist
10 Jennifer Doudna    Biochemist

dates

# A tibble: 8 × 3
  name               birth_year death_year
  <chr>                   <dbl>      <dbl>
1 Janaki Ammal             1897       1984
2 Chien-Shiung Wu          1912       1997
3 Katherine Johnson        1918       2020
4 Rosalind Franklin        1920       1958
5 Vera Rubin               1928       2016
6 Gladys West              1930         NA
7 Flossie Wong-Staal       1947       2020
8 Jennifer Doudna          1964         NA

works

# A tibble: 9 × 2
  name               known_for                                                  
  <chr>              <chr>                                                      
1 Ada Lovelace       first computer algorithm                                   
2 Marie Curie        theory of radioactivity,  discovery of elements polonium a…
3 Janaki Ammal       hybrid species, biodiversity protection                    
4 Chien-Shiung Wu    confim and refine theory of radioactive beta decy, Wu expe…
5 Katherine Johnson  calculations of orbital mechanics critical to sending the …
6 Vera Rubin         existence of dark matter                                   
7 Gladys West        mathematical modeling of the shape of the Earth which serv…
8 Flossie Wong-Staal first scientist to clone HIV and create a map of its genes…
9 Jennifer Doudna    one of the primary developers of CRISPR, a ground-breaking…

Desired output

We’d like to put together the data to look like:

# A tibble: 10 × 5
   name               profession                 birth_year death_year known_for
   <chr>              <chr>                           <dbl>      <dbl> <chr>    
 1 Ada Lovelace       Mathematician                      NA         NA first co…
 2 Marie Curie        Physicist and Chemist              NA         NA theory o…
 3 Janaki Ammal       Botanist                         1897       1984 hybrid s…
 4 Chien-Shiung Wu    Physicist                        1912       1997 confim a…
 5 Katherine Johnson  Mathematician                    1918       2020 calculat…
 6 Rosalind Franklin  Chemist                          1920       1958 <NA>     
 7 Vera Rubin         Astronomer                       1928       2016 existenc…
 8 Gladys West        Mathematician                    1930         NA mathemat…
 9 Flossie Wong-Staal Virologist and Molecular …       1947       2020 first sc…
10 Jennifer Doudna    Biochemist                       1964         NA one of t…

Inputs, reminder

nrow(professions)

[1] 10

nrow(dates)

[1] 8

nrow(works)

[1] 9

names(professions)

[1] "name"       "profession"

names(dates)

[1] "name"       "birth_year" "death_year"

names(works)

[1] "name"      "known_for"

Setup

For the next few slides…

# A tibble: 3 × 2
     id value_x
  <dbl> <chr>  
1     1 x1     
2     2 x2     
3     3 x3

# A tibble: 3 × 2
     id value_y
  <dbl> <chr>  
1     1 y1     
2     2 y2     
3     4 y4

`left_join()`

image credit: https://www.garrickadenbuie.com/project/tidyexplain/

left_join(x, y, by = "id")

# A tibble: 3 × 3
     id value_x value_y
  <dbl> <chr>   <chr>  
1     1 x1      y1     
2     2 x2      y2     
3     3 x3      <NA>

`left_join()`

professions |> 
  left_join(dates, by = "name")

# A tibble: 10 × 4
   name               profession                         birth_year death_year
   <chr>              <chr>                                   <dbl>      <dbl>
 1 Ada Lovelace       Mathematician                              NA         NA
 2 Marie Curie        Physicist and Chemist                      NA         NA
 3 Janaki Ammal       Botanist                                 1897       1984
 4 Chien-Shiung Wu    Physicist                                1912       1997
 5 Katherine Johnson  Mathematician                            1918       2020
 6 Rosalind Franklin  Chemist                                  1920       1958
 7 Vera Rubin         Astronomer                               1928       2016
 8 Gladys West        Mathematician                            1930         NA
 9 Flossie Wong-Staal Virologist and Molecular Biologist       1947       2020
10 Jennifer Doudna    Biochemist                               1964         NA

`right_join()`

right_join(x, y, by = "id")

# A tibble: 3 × 3
     id value_x value_y
  <dbl> <chr>   <chr>  
1     1 x1      y1     
2     2 x2      y2     
3     4 <NA>    y4

`right_join()`

professions |> 
  right_join(dates, by = "name")

# A tibble: 8 × 4
  name               profession                         birth_year death_year
  <chr>              <chr>                                   <dbl>      <dbl>
1 Janaki Ammal       Botanist                                 1897       1984
2 Chien-Shiung Wu    Physicist                                1912       1997
3 Katherine Johnson  Mathematician                            1918       2020
4 Rosalind Franklin  Chemist                                  1920       1958
5 Vera Rubin         Astronomer                               1928       2016
6 Gladys West        Mathematician                            1930         NA
7 Flossie Wong-Staal Virologist and Molecular Biologist       1947       2020
8 Jennifer Doudna    Biochemist                               1964         NA

`full_join()`

full_join(x, y, by = "id")

# A tibble: 4 × 3
     id value_x value_y
  <dbl> <chr>   <chr>  
1     1 x1      y1     
2     2 x2      y2     
3     3 x3      <NA>   
4     4 <NA>    y4

`full_join()`

dates |> 
  full_join(works, by = "name")

# A tibble: 10 × 4
   name               birth_year death_year known_for                           
   <chr>                   <dbl>      <dbl> <chr>                               
 1 Janaki Ammal             1897       1984 hybrid species, biodiversity protec…
 2 Chien-Shiung Wu          1912       1997 confim and refine theory of radioac…
 3 Katherine Johnson        1918       2020 calculations of orbital mechanics c…
 4 Rosalind Franklin        1920       1958 <NA>                                
 5 Vera Rubin               1928       2016 existence of dark matter            
 6 Gladys West              1930         NA mathematical modeling of the shape …
 7 Flossie Wong-Staal       1947       2020 first scientist to clone HIV and cr…
 8 Jennifer Doudna          1964         NA one of the primary developers of CR…
 9 Ada Lovelace               NA         NA first computer algorithm            
10 Marie Curie                NA         NA theory of radioactivity,  discovery…

`inner_join()`

inner_join(x, y, by = "id")

# A tibble: 2 × 3
     id value_x value_y
  <dbl> <chr>   <chr>  
1     1 x1      y1     
2     2 x2      y2

`inner_join()`

dates |> 
  inner_join(works, by = "name")

# A tibble: 7 × 4
  name               birth_year death_year known_for                            
  <chr>                   <dbl>      <dbl> <chr>                                
1 Janaki Ammal             1897       1984 hybrid species, biodiversity protect…
2 Chien-Shiung Wu          1912       1997 confim and refine theory of radioact…
3 Katherine Johnson        1918       2020 calculations of orbital mechanics cr…
4 Vera Rubin               1928       2016 existence of dark matter             
5 Gladys West              1930         NA mathematical modeling of the shape o…
6 Flossie Wong-Staal       1947       2020 first scientist to clone HIV and cre…
7 Jennifer Doudna          1964         NA one of the primary developers of CRI…

`semi_join()`

semi_join(x, y, by = "id")

# A tibble: 2 × 2
     id value_x
  <dbl> <chr>  
1     1 x1     
2     2 x2

`semi_join()`

dates |> 
  semi_join(works, by = "name")

# A tibble: 7 × 3
  name               birth_year death_year
  <chr>                   <dbl>      <dbl>
1 Janaki Ammal             1897       1984
2 Chien-Shiung Wu          1912       1997
3 Katherine Johnson        1918       2020
4 Vera Rubin               1928       2016
5 Gladys West              1930         NA
6 Flossie Wong-Staal       1947       2020
7 Jennifer Doudna          1964         NA

`anti_join()`

anti_join(x, y, by = "id")

# A tibble: 1 × 2
     id value_x
  <dbl> <chr>  
1     3 x3

`anti_join()`

dates |> 
  anti_join(works, by = "name")

# A tibble: 1 × 3
  name              birth_year death_year
  <chr>                  <dbl>      <dbl>
1 Rosalind Franklin       1920       1958

Putting it altogether

professions |> 
  left_join(dates, by = "name") |> 
  left_join(works, by = "name")

# A tibble: 10 × 5
   name               profession                 birth_year death_year known_for
   <chr>              <chr>                           <dbl>      <dbl> <chr>    
 1 Ada Lovelace       Mathematician                      NA         NA first co…
 2 Marie Curie        Physicist and Chemist              NA         NA theory o…
 3 Janaki Ammal       Botanist                         1897       1984 hybrid s…
 4 Chien-Shiung Wu    Physicist                        1912       1997 confim a…
 5 Katherine Johnson  Mathematician                    1918       2020 calculat…
 6 Rosalind Franklin  Chemist                          1920       1958 <NA>     
 7 Vera Rubin         Astronomer                       1928       2016 existenc…
 8 Gladys West        Mathematician                    1930         NA mathemat…
 9 Flossie Wong-Staal Virologist and Molecular …       1947       2020 first sc…
10 Jennifer Doudna    Biochemist                       1964         NA one of t…

Practice `litF` and `GDP` from Gapminder.

left

litGDPleft <- left_join(litF, GDP, by=c("country", "year"))
dim(litGDPleft)

[1] 571   4

litGDPleft

# A tibble: 571 × 4
   country     year  litRateF   gdp
   <chr>       <chr>    <dbl> <dbl>
 1 Afghanistan 1979      4.99   NA 
 2 Afghanistan 2011     13      NA 
 3 Albania     2001     98.3  1282.
 4 Albania     2008     94.7  1804.
 5 Albania     2011     95.7  1966.
 6 Algeria     1987     35.8  1902.
 7 Algeria     2002     60.1  1872.
 8 Algeria     2006     63.9  2125.
 9 Angola      2001     54.2   298.
10 Angola      2011     58.6   630.
# ℹ 561 more rows

right

litGDPright <- right_join(litF, GDP, by=c("country", "year"))
dim(litGDPright)

[1] 7988    4

litGDPright

# A tibble: 7,988 × 4
   country             year  litRateF   gdp
   <chr>               <chr>    <dbl> <dbl>
 1 Albania             2001      98.3 1282.
 2 Albania             2008      94.7 1804.
 3 Albania             2011      95.7 1966.
 4 Algeria             1987      35.8 1902.
 5 Algeria             2002      60.1 1872.
 6 Algeria             2006      63.9 2125.
 7 Angola              2001      54.2  298.
 8 Angola              2011      58.6  630.
 9 Antigua and Barbuda 2001      99.4 9640.
10 Antigua and Barbuda 2011      99.4 9978.
# ℹ 7,978 more rows

inner

litGDPinner <- inner_join(litF, GDP, by=c("country", "year"))
dim(litGDPinner)

[1] 505   4

litGDPinner

# A tibble: 505 × 4
   country             year  litRateF   gdp
   <chr>               <chr>    <dbl> <dbl>
 1 Albania             2001      98.3 1282.
 2 Albania             2008      94.7 1804.
 3 Albania             2011      95.7 1966.
 4 Algeria             1987      35.8 1902.
 5 Algeria             2002      60.1 1872.
 6 Algeria             2006      63.9 2125.
 7 Angola              2001      54.2  298.
 8 Angola              2011      58.6  630.
 9 Antigua and Barbuda 2001      99.4 9640.
10 Antigua and Barbuda 2011      99.4 9978.
# ℹ 495 more rows

full

litGDPfull <- full_join(litF, GDP, by=c("country", "year"))
dim(litGDPfull)

[1] 8054    4

litGDPfull

# A tibble: 8,054 × 4
   country     year  litRateF   gdp
   <chr>       <chr>    <dbl> <dbl>
 1 Afghanistan 1979      4.99   NA 
 2 Afghanistan 2011     13      NA 
 3 Albania     2001     98.3  1282.
 4 Albania     2008     94.7  1804.
 5 Albania     2011     95.7  1966.
 6 Algeria     1987     35.8  1902.
 7 Algeria     2002     60.1  1872.
 8 Algeria     2006     63.9  2125.
 9 Angola      2001     54.2   298.
10 Angola      2011     58.6   630.
# ℹ 8,044 more rows

`join` to merge two datasets

If you ever need to understand which join is the right join for you, try to find an image that will lay out what the function is doing. I found this one that is quite good and is taken from the Statistics Globe blog.

lubridate

lubridate is a another R package meant for data wrangling!

In particular, lubridate makes it very easy to work with days, times, and dates. The base idea is to start with dates in a ymd (year month day) format and transform the information into whatever you want.

Example from the lubridate vignette.

If anyone drove a time machine, they would crash

The length of months and years change so often that doing arithmetic with them can be unintuitive. Consider a simple operation: January 31st + one month.

If anyone drove a time machine, they would crash

The length of months and years change so often that doing arithmetic with them can be unintuitive. Consider a simple operation: January 31st + one month.

Should the answer be:

February 31st (which doesn’t exist)
March 4th (31 days after January 31), or
February 28th (assuming its not a leap year)

A basic property of arithmetic is that a + b - b = a. Only solution 1 obeys the mathematical property, but it is an invalid date. Wickham wants to make lubridate as consistent as possible by invoking the following rule: if adding or subtracting a month or a year creates an invalid date, lubridate will return an NA.

If you thought solution 2 or 3 was more useful, no problem. You can still get those results with clever arithmetic, or by using the special %m+% and %m-% operators. %m+% and %m-% automatically roll dates back to the last day of the month, should that be necessary.

basics in `lubridate`

library(lubridate)
rightnow <- now()

day(rightnow)

[1] 11

week(rightnow)

[1] 37

month(rightnow, label=FALSE)

[1] 9

month(rightnow, label=TRUE)

[1] Sep
12 Levels: Jan < Feb < Mar < Apr < May < Jun < Jul < Aug < Sep < ... < Dec

year(rightnow)

[1] 2024

basics in `lubridate`

minute(rightnow)

[1] 14

hour(rightnow)

[1] 10

yday(rightnow)

[1] 255

mday(rightnow)

[1] 11

wday(rightnow, label=FALSE)

[1] 4

wday(rightnow, label=TRUE)

[1] Wed
Levels: Sun < Mon < Tue < Wed < Thu < Fri < Sat

But how do I create a date object?

jan31 <- ymd("2021-01-31")
jan31 + months(0:11)

 [1] "2021-01-31" NA           "2021-03-31" NA           "2021-05-31"
 [6] NA           "2021-07-31" "2021-08-31" NA           "2021-10-31"
[11] NA           "2021-12-31"

floor_date(jan31, "month") + months(0:11) + days(31)

 [1] "2021-02-01" "2021-03-04" "2021-04-01" "2021-05-02" "2021-06-01"
 [6] "2021-07-02" "2021-08-01" "2021-09-01" "2021-10-02" "2021-11-01"
[11] "2021-12-02" "2022-01-01"

jan31 + months(0:11) + days(31)

 [1] "2021-03-03" NA           "2021-05-01" NA           "2021-07-01"
 [6] NA           "2021-08-31" "2021-10-01" NA           "2021-12-01"
[11] NA           "2022-01-31"

jan31 %m+% months(0:11)

 [1] "2021-01-31" "2021-02-28" "2021-03-31" "2021-04-30" "2021-05-31"
 [6] "2021-06-30" "2021-07-31" "2021-08-31" "2021-09-30" "2021-10-31"
[11] "2021-11-30" "2021-12-31"

NYC flights

library(nycflights13)
names(flights)

 [1] "year"           "month"          "day"            "dep_time"      
 [5] "sched_dep_time" "dep_delay"      "arr_time"       "sched_arr_time"
 [9] "arr_delay"      "carrier"        "flight"         "tailnum"       
[13] "origin"         "dest"           "air_time"       "distance"      
[17] "hour"           "minute"         "time_hour"

NYC flights

flightsWK <- flights |>  
   mutate(ymdday = ymd(paste(year, month,day, sep="-"))) |> 
   mutate(weekdy = wday(ymdday, label=TRUE), 
          whichweek = week(ymdday)) 

flightsWK |>  select(year, month, day, ymdday, weekdy, whichweek, 
                     dep_time, arr_time, air_time)

# A tibble: 336,776 × 9
    year month   day ymdday     weekdy whichweek dep_time arr_time air_time
   <int> <int> <int> <date>     <ord>      <dbl>    <int>    <int>    <dbl>
 1  2013     1     1 2013-01-01 Tue            1      517      830      227
 2  2013     1     1 2013-01-01 Tue            1      533      850      227
 3  2013     1     1 2013-01-01 Tue            1      542      923      160
 4  2013     1     1 2013-01-01 Tue            1      544     1004      183
 5  2013     1     1 2013-01-01 Tue            1      554      812      116
 6  2013     1     1 2013-01-01 Tue            1      554      740      150
 7  2013     1     1 2013-01-01 Tue            1      555      913      158
 8  2013     1     1 2013-01-01 Tue            1      557      709       53
 9  2013     1     1 2013-01-01 Tue            1      557      838      140
10  2013     1     1 2013-01-01 Tue            1      558      753      138
# ℹ 336,766 more rows

`reprex`

Help me help you

reproducible example …

Step 1. Copy code onto the clipboard

Step 2. Type reprex() into the Console

Step 3. Look at the Viewer to the right. Copy the Viewer output into GitHub, Piazza, Discord, an email, stackexchange, etc.

`reprex` demo

reprex(
  jan31 + months(0:11) + days(31)
)

multiple lines of code:

reprex({
  jan31 <- ymd("2021-01-31")
  jan31 + months(0:11) + days(31)
})

reprex({
  library(lubridate)
  jan31 <- ymd("2021-01-31")
  jan31 + months(0:11) + days(31)
})