library("gt")
library("Lahman")
library("tidyverse")
Lahman
<- Teams |>
df filter(yearID >= 2004) #focusing on the past 18 season
::write_csv(df, "baseball_2004_2022.csv") readr
colnames(df)
[1] "yearID" "lgID" "teamID" "franchID"
[5] "divID" "Rank" "G" "Ghome"
[9] "W" "L" "DivWin" "WCWin"
[13] "LgWin" "WSWin" "R" "AB"
[17] "H" "X2B" "X3B" "HR"
[21] "BB" "SO" "SB" "CS"
[25] "HBP" "SF" "RA" "ER"
[29] "ERA" "CG" "SHO" "SV"
[33] "IPouts" "HA" "HRA" "BBA"
[37] "SOA" "E" "DP" "FP"
[41] "name" "park" "attendance" "BPF"
[45] "PPF" "teamIDBR" "teamIDlahman45" "teamIDretro"
|>
df select(yearID, franchID, HR) |> #select columns to keep
slice_max(HR, n = 10) |> #finds 10 highest values by numerical variable
gt() #makes nice tables
yearID | franchID | HR |
---|---|---|
2019 | MIN | 307 |
2019 | NYY | 306 |
2019 | HOU | 288 |
2019 | LAD | 279 |
2018 | NYY | 267 |
2021 | TOR | 262 |
2005 | TEX | 260 |
2010 | TOR | 257 |
2019 | OAK | 257 |
2019 | CHC | 256 |
|>
df select(yearID, franchID, ERA) |> #select columns to keep
slice_min(ERA, n = 10) |> #finds 10 loweest values by numerical variable
gt() #makes nice tables
yearID | franchID | ERA |
---|---|---|
2022 | LAD | 2.80 |
2022 | HOU | 2.90 |
2015 | STL | 2.94 |
2021 | LAD | 3.01 |
2011 | PHI | 3.02 |
2020 | LAD | 3.02 |
2014 | WSN | 3.03 |
2018 | HOU | 3.11 |
2016 | CHC | 3.15 |
2014 | SEA | 3.17 |