Some Quick Baseball Stats

Author

Derek Sollberger

Published

September 17, 2023

Lahman

library("gt")
library("Lahman")
library("tidyverse")
df <- Teams |>
  filter(yearID >= 2004) #focusing on the past 18 season
readr::write_csv(df, "baseball_2004_2022.csv")
colnames(df)
 [1] "yearID"         "lgID"           "teamID"         "franchID"      
 [5] "divID"          "Rank"           "G"              "Ghome"         
 [9] "W"              "L"              "DivWin"         "WCWin"         
[13] "LgWin"          "WSWin"          "R"              "AB"            
[17] "H"              "X2B"            "X3B"            "HR"            
[21] "BB"             "SO"             "SB"             "CS"            
[25] "HBP"            "SF"             "RA"             "ER"            
[29] "ERA"            "CG"             "SHO"            "SV"            
[33] "IPouts"         "HA"             "HRA"            "BBA"           
[37] "SOA"            "E"              "DP"             "FP"            
[41] "name"           "park"           "attendance"     "BPF"           
[45] "PPF"            "teamIDBR"       "teamIDlahman45" "teamIDretro"   
df |>
  select(yearID, franchID, HR) |> #select columns to keep
  slice_max(HR, n = 10) |> #finds 10 highest values by numerical variable
  gt() #makes nice tables
yearID franchID HR
2019 MIN 307
2019 NYY 306
2019 HOU 288
2019 LAD 279
2018 NYY 267
2021 TOR 262
2005 TEX 260
2010 TOR 257
2019 OAK 257
2019 CHC 256
df |>
  select(yearID, franchID, ERA) |> #select columns to keep
  slice_min(ERA, n = 10) |> #finds 10 loweest values by numerical variable
  gt() #makes nice tables
yearID franchID ERA
2022 LAD 2.80
2022 HOU 2.90
2015 STL 2.94
2021 LAD 3.01
2011 PHI 3.02
2020 LAD 3.02
2014 WSN 3.03
2018 HOU 3.11
2016 CHC 3.15
2014 SEA 3.17