5 Methods

5.1 stayCALM Package Installation

You can install the development version from GitHub with:

# install.packages("devtools")
devtools::install_github("BWAM/stayCALM")

You must have rtools installed on your machine for the stayCALM to be built upon installation from GitHub. rtools is NOT an R package. On Windows machines, rtools can be installed from the following CRAN repository: https://cran.r-project.org/bin/windows/Rtools/.

5.2 Prepare the R Environment

Load the necessary R packages into the global environment, including the stayCALM package.

library(tidyverse)
library(stayCALM)

5.3 Preprocess Data

5.3.1 Water Quality Standards

Data was preprocessed to resemble the expected output from the athorotative databases that will become available as part of the Data Modernization effort Data Modernization.

data(nysdec_wqs)

fresh - fresh surface waters
- D > C > B > A > A-Special > AA > AA-Special
- D > C > C(T) > C(TS) > B > B(T) > B(TS) > A > A(T) > A(TS) > A-Special > A-Special(T) > A-Special(TS) > AA > AA(T) > AA(TS) > AA-Special > AA-Special(T) > AA-Special(TS)
saline - saline surface waters
- SD > I > SC > SB > SA

wqs.df <- purrr::map_df(c("fresh", "saline", "saline_i"), function(type.i) {
  class.vec <- switch(type.i,
                      "fresh" = c("D", class_variants(.class_vec = c("C", "B",
                                                                     "A", "A-Special",
                                                                     "AA", "AA-Special"),
                                                      .variant_vec = c("", "(T)", "(TS)"))),
                      "saline" = c("SD", "I", "SC", "SB", "SA"))
                      #"saline_i" = "I")
  
  wqs_sub.df <- nysdec_wqs[nysdec_wqs$class_type == type.i, ]
  
  
  final.df <- class_inheritance(.data = wqs_sub.df,
                                parameter,
                                .class_col = class,
                                .levels_vec = class.vec) %>% 
    dplyr::mutate(class = as.character(class))
    
})

5.3.2 WI/PWL

Data was preprocessed to resemble the expected output from the athorotative databases that will become available as part of the Data Modernization effort Data Modernization.

data("wipwl.df")

wqs_wipwl.df <- wipwl.df %>%
  select(seg_id, class, tmdl_id, mgmtstat) %>% 
  distinct() %>%
  left_join(wqs.df, by = "class")

5.3.3 Stream Data

Data was preprocessed to resemble the expected output from the athorotative databases that will become available as part of the Data Modernization effort Data Modernization.

data(stream_chem.df)

chem.df <- stream_chem.df %>% 
  rename_all(tolower) %>% 
  rename(seg_id = sh_pwl_id,
         parameter = chemical_name) %>% 
  inner_join(wqs_wipwl.df, by = c("seg_id", "parameter"))

chem_final.df <- chem.df %>% 
  mutate(result_numeric = if_else(is.na(result_numeric) & !is.na(validation_level),
                                  reporting_detection_limit,
                                  result_numeric)) %>% 
    filter(!validation_level %in% "Not Reported",
         !is.na(result_numeric))

5.4 Assess

Thresholds represented by NA will throw an error. The majority of the NAs in the threshold column now need to be calculated– a task I have not standardized yet.

chem_final.df <- chem_final.df %>%
  filter(!is.na(threshold))

assessed.df <- chem_final.df %>%
  # Drop this filter after additional prep steps have been added
  filter(is.na(formula) & statistic %in% "none" & block %in% "single") %>% 
  date_standard_cols(.date_col = sample_date) %>% 
  assessment_id(seg_id,
                lab_anl_method_name,
                cas_rn,
                parameter) %>%
  mutate(within_period  = assessment_period(.date_vec = date,
                                            .n_years_ago = 10),
         attaining_wqs = wqs_attaining(result_numeric,
                                       direction,
                                       threshold,
                                       na.rm = TRUE),
         attaining_75 = wqs_attaining(result_numeric,
                                      direction,
                                      threshold,
                                      na.rm = TRUE)) %>% 
  group_by(assessment_id, within_period) %>% 
  summarize(
    wqs_violation = any(attaining_wqs == FALSE),
    wqs_75_violation = any(attaining_75 == FALSE),
    min_years_samples = length(unique(year)) >= 2 & length(assessment_id) >= 8,
    min_violations_year = sum(tapply(attaining_wqs == FALSE, year, sum) > 0),
    tmdl = FALSE,
    ltco_rest_plan = FALSE,
    pollutant = FALSE 
  )

test.df <- assessed.df %>% 
  mutate(
    assessment = case_when(
      !within_period ~ "Manual Review Required: Manual Review Required: Manual Review Required",
      !min_years_samples & wqs_violation ~ "IR3: Impaired: Unconfirmed",
      !min_years_samples & !wqs_violation & wqs_75_violation  ~ "IR3: Fully Supported: Unconfirmed",
      !min_years_samples & !wqs_violation & !wqs_75_violation  ~ "IR3: Stressed: Unconfirmed",
      min_years_samples & wqs_violation & wqs_75_violation  ~ "IR1: Fully Supported: Confirmed",
      min_years_samples & wqs_violation & !wqs_75_violation  ~ "IR1: Stressed: Confirmed",
      min_years_samples & !wqs_violation & !min_violations_year  ~ "IR3: Impaired: Unconfirmed",
      min_years_samples & !wqs_violation & 
        min_violations_year & tmdl  ~ "IR4a: Impaired: Confirmed",
      min_years_samples & !wqs_violation &
        min_violations_year & !tmdl & ltco_rest_plan  ~ "IR4b: Impaired: Unconfirmed",
      min_years_samples & !wqs_violation &
        min_violations_year & !tmdl & !ltco_rest_plan & pollutant  ~ "IR4c: Impaired: Confirmed",
      min_years_samples & !wqs_violation &
        min_violations_year & !tmdl & !ltco_rest_plan & !pollutant  ~ "IR5: Impaired: Confirmed",
      TRUE ~ "ERROR"
    )
  )

# table(test.df$assessment)