library("tidyverse")
# load data (as shown)
flint_mdeq <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-11-04/flint_mdeq.csv')
flint_vt <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-11-04/flint_vt.csv')Tidy Tuesday
The TidyTuesday data set for November 4, 2025, was about the Flint water crisis. I want to summarize some of the findings for my data science course.
Summary statistics
All MDEQ Samples
summary(flint_mdeq$lead) Min. 1st Qu. Median Mean 3rd Qu. Max.
0.00 2.00 3.00 7.31 6.50 104.00
Outliers Removed
summary(flint_mdeq$lead2) Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
0.000 2.000 3.000 5.725 6.000 42.000 2
Independent Test
summary(flint_vt$lead) Min. 1st Qu. Median Mean 3rd Qu. Max.
0.344 1.578 3.521 10.646 9.050 158.000
mu1_mdeq <- mean(flint_mdeq$lead, na.rm = TRUE)
mu2_mdeq <- mean(flint_mdeq$lead2, na.rm = TRUE)
mu_vt <- mean(flint_vt$lead, na.rm = TRUE)
top10_1_mdeq <- quantile(flint_mdeq$lead, 0.90, na.rm = TRUE)
top10_2_mdeq <- quantile(flint_mdeq$lead2, 0.90, na.rm = TRUE)
top10_vt <- quantile(flint_vt$lead, 0.90, na.rm = TRUE)Histograms
All MDEQ Samples
flint_mdeq |>
ggplot() +
geom_histogram(aes(x = lead),
binwidth = 5, color = "black", fill = "gray75") +
geom_vline(xintercept = mu1_mdeq, color = "blue",
linetype = 2, linewidth = 2) +
geom_vline(xintercept = top10_1_mdeq, color = "red",
linetype = 2, linewidth = 2) +
labs(title = "Flint Water Samples in 2015",
subtitle = "MDEQ measurements (average in blue, top 10 percentile in red)",
caption = "Source: Jen Richmond",
x = "lead (ppb)", y = "number of samples") +
theme_minimal()
Outliers Removed
flint_mdeq |>
ggplot() +
geom_histogram(aes(x = lead2),
binwidth = 5, color = "black", fill = "gray75") +
geom_vline(xintercept = mu1_mdeq, color = "blue",
linetype = 2, linewidth = 2) +
geom_vline(xintercept = top10_2_mdeq, color = "red",
linetype = 2, linewidth = 2) +
labs(title = "Flint Water Samples in 2015",
subtitle = "MDEQ measurements, two samples missing (average in blue, top 10 percentile in red)",
caption = "Source: Jen Richmond",
x = "lead (ppb)", y = "number of samples") +
theme_minimal()Warning: Removed 2 rows containing non-finite outside the scale range
(`stat_bin()`).

Independent Test
flint_vt |>
ggplot() +
geom_histogram(aes(x = lead),
binwidth = 5, color = "black", fill = "gray75") +
geom_vline(xintercept = mu1_mdeq, color = "blue",
linetype = 2, linewidth = 2) +
geom_vline(xintercept = top10_vt, color = "red",
linetype = 2, linewidth = 2) +
labs(title = "Flint Water Samples in 2015",
subtitle = "VT measurements (average in blue, top 10 percentile in red)",
caption = "Source: Jen Richmond",
x = "lead (ppb)", y = "number of samples") +
theme_minimal()
Diagnoses
“the Lead and Copper Rule (LCR) of 1991 is 15 parts per billion (ppb). If this is exceeded in more than 10% of homes tested (or if the 90th percentile value of the total sample is above 15 ppb), action is required.” — Significance, Vol 14, Issue 2
All MDEQ Samples
ifelse(quantile(flint_mdeq$lead, 0.90, na.rm = TRUE) > 15,
"action required", "safe water") 90%
"action required"
Outliers Removed
ifelse(quantile(flint_mdeq$lead2, 0.90, na.rm = TRUE) > 15,
"action required", "safe water") 90%
"safe water"
Independent Test
ifelse(quantile(flint_vt$lead, 0.90, na.rm = TRUE) > 15,
"action required", "safe water") 90%
"action required"
Coda
NoteSession Info
sessionInfo()R version 4.5.2 (2025-10-31 ucrt)
Platform: x86_64-w64-mingw32/x64
Running under: Windows 10 x64 (build 19045)
Matrix products: default
LAPACK version 3.12.1
locale:
[1] LC_COLLATE=English_United States.utf8
[2] LC_CTYPE=English_United States.utf8
[3] LC_MONETARY=English_United States.utf8
[4] LC_NUMERIC=C
[5] LC_TIME=English_United States.utf8
time zone: America/New_York
tzcode source: internal
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] lubridate_1.9.4 forcats_1.0.0 stringr_1.5.1 dplyr_1.1.4
[5] purrr_1.1.0 readr_2.1.5 tidyr_1.3.1 tibble_3.3.0
[9] ggplot2_4.0.0 tidyverse_2.0.0
loaded via a namespace (and not attached):
[1] bit_4.6.0 gtable_0.3.6 jsonlite_2.0.0 crayon_1.5.3
[5] compiler_4.5.2 tidyselect_1.2.1 parallel_4.5.2 scales_1.4.0
[9] yaml_2.3.10 fastmap_1.2.0 R6_2.6.1 labeling_0.4.3
[13] generics_0.1.4 curl_6.4.0 knitr_1.50 htmlwidgets_1.6.4
[17] pillar_1.11.0 RColorBrewer_1.1-3 tzdb_0.5.0 rlang_1.1.6
[21] stringi_1.8.7 xfun_0.52 S7_0.2.0 bit64_4.6.0-1
[25] timechange_0.3.0 cli_3.6.5 withr_3.0.2 magrittr_2.0.3
[29] digest_0.6.37 grid_4.5.2 vroom_1.6.5 rstudioapi_0.17.1
[33] hms_1.1.3 lifecycle_1.0.4 vctrs_0.6.5 evaluate_1.0.4
[37] glue_1.8.0 farver_2.1.2 rmarkdown_2.29 tools_4.5.2
[41] pkgconfig_2.0.3 htmltools_0.5.8.1