|
1 | 1 | #' Quality Control Check on User Data Against Master Taxa List |
2 | 2 | #' |
3 | | -#' This function compares the user's data frame to a data frame with the |
4 | | -#' official (or user supplied) master taxa list (benthic macroinvertebrates). |
| 3 | +#' This function has been deprecated (March 2026). |
5 | 4 | #' |
6 | | -#' Output is a data frame with matches. |
| 5 | +#' The new function is qc_taxa_match_official. |
7 | 6 | #' |
8 | | -#' Messages are output to the console with the number of matches and which user |
9 | | -#' taxa did not match the official list. |
10 | | -#' |
11 | | -#' The official list is stored online but the user can input their own saved |
12 | | -#' copy. |
13 | | -#' |
14 | | -#' Any columns in the user input file that match the official master taxa list |
15 | | -#' will be renamed with the "_NonOfficial" suffix. |
16 | | -#' |
17 | | -#' New/different taxa in the user data are handled by the 'useOfficialTaxaInfo' |
18 | | -#' parameter. For taxa that did not match the master taxa list the user has |
19 | | -#' options on how to handle the differences for the phylogeny (e.g., columns for |
20 | | -#' phylum, class, family, etc.) and autecology (e.g., columns for FFG, habit, |
21 | | -#' tolerance value, etc.). The options are below. |
22 | | -#' |
23 | | -#' * only_official = use only official master taxa information. Any |
24 | | -#' non-matching taxa will not have any master taxa information. |
25 | | -#' |
26 | | -#' * only_user = only use the information provided by the user. Information |
27 | | -#' from the 'Official' will not be used. This should only be used for |
28 | | -#' non-official calculations. |
29 | | -#' |
30 | | -#' * add_new = hybrid approach that uses official master taxa information, when |
31 | | -#' present, but includes user information for non-matching taxa if the column |
32 | | -#' names match. |
33 | | -#' |
34 | | -#' Default master taxa lists are saved as CSV files online at: |
35 | | -#' |
36 | | -#' https://github.com/leppott/MBSStools_SupportFiles |
37 | | -#' |
38 | | -#' The files can be downloaded with the following code. |
39 | | -#' |
40 | | -#' **Benthic Macroinvertebrate** |
41 | | -#' |
42 | | -#' url_mt_bugs <- "https://github.com/leppott/MBSStools_SupportFiles/raw/master/Data/CHAR_Bugs.csv" |
43 | | -#' df_mt_bugs <- read.csv(url_mt_bugs) |
44 | | -#' |
45 | | -#' The master taxa files are periodically updated. Update dates will be logged |
46 | | -#' on the GitHub repository. |
47 | | -#' |
48 | | -#' Expected fields include: |
49 | | -#' |
50 | | -#' **Benthic Macroinvertebrates** |
51 | | -#' |
52 | | -#' + TAXON, Phylum, Class, Order, Family, Genus, Other_Taxa, Tribe, FFG, |
53 | | -#' FAM_TV, Habit, FinalTolVal07, Comment |
| 7 | +#' This function exists only as a wrapper to avoid breaking older code. |
54 | 8 | #' |
55 | 9 | #' @param DF_User User taxa data. |
56 | 10 | #' @param DF_Official Official master taxa list. Can be a local file or |
@@ -94,198 +48,11 @@ qc_taxa <- function(DF_User, |
94 | 48 | DF_Official = NULL, |
95 | 49 | fun.Community = NULL, |
96 | 50 | useOfficialTaxaInfo = "only_Official") { |
97 | | - ##FUNCTION ~ mastertaxa ~START |
98 | | - # |
99 | | - boo_DEBUG <- FALSE |
100 | | - if(boo_DEBUG==TRUE){##IF~boo_DEBUG~START |
101 | | - # # # Bugs |
102 | | - # DF_User<- taxa_bugs_genus |
103 | | - # DF_Official = NULL |
104 | | - # fun.Community = "bugs" |
105 | | - # useOfficialTaxaInfo = "only_Official" |
106 | | - # # |
107 | | - }##IF~boo_DEBUG~END |
108 | | - |
109 | | - # Col Suffixes |
110 | | - sfx_Official <- "_Official" |
111 | | - sfx_NonOfficial <- "_NonOfficial" |
112 | | - |
113 | | - # QC |
114 | | - ## inputs as data frames (just in case have a tibble) |
115 | | - DF_User <- data.frame(DF_User) |
116 | | - # DF_Official handled when checking URL |
117 | | - ## Community, convert community to lowercase |
118 | | - fun.Community <- tolower(fun.Community) |
119 | | - |
120 | | - # Taxa list, official |
121 | | - # run the proper sub function |
122 | | - if (fun.Community == "bugs") {##IF.START |
123 | | - url_mt <- "https://github.com/leppott/MBSStools_SupportFiles/raw/master/Data/CHAR_Bugs.csv" |
124 | | - col_mt <- c("Taxon", |
125 | | - "Phylum", |
126 | | - "Class", |
127 | | - "Order", |
128 | | - "Family", |
129 | | - "Genus", |
130 | | - "Other_Taxa", |
131 | | - "Tribe", |
132 | | - "FFG", |
133 | | - "FAM_TV", |
134 | | - "Habit", |
135 | | - "FinalTolVal07", |
136 | | - "Comment") |
137 | | - col_taxon <- col_mt[1] |
138 | | - # } else if(fun.Community == "fish"){ |
139 | | - # url_mt <- "https://github.com/leppott/MBSStools_SupportFiles/raw/master/Data/CHAR_Fish.csv" |
140 | | - # col_mt <- c("SPECIES", "TYPE", "PTOLR", "NATIVE", "TROPHIC", "SILT" |
141 | | - # , "PIRHALLA","DATE.ADDED", "REASON", "SOURCE", "FAM", "GENUS" |
142 | | - # , "SP_SCI", "IN_KEY", "APPROX_ID" ) |
143 | | - # col_taxon <- col_mt[1] |
144 | | - # future functionality |
145 | | - } else { |
146 | | - msg <- "Valid values for fun.Community is only 'bugs'." |
147 | | - stop(msg) |
148 | | - }##IF ~ fun.community ~ END |
149 | | - |
150 | | - # Master Taxa |
151 | | - # Download "official" list if none provided |
152 | | - if(is.null(DF_Official)){ |
153 | | - # 404 Error if file not found |
154 | | - df_mt <- utils::read.csv(url_mt) |
155 | | - } else { |
156 | | - df_mt <- data.frame(DF_Official) |
157 | | - }## IF ~ is.null(DF_Official) ~ END |
158 | | - |
159 | | - # Names to upper case |
160 | | - names(DF_User) <- toupper(names(DF_User)) |
161 | | - names(df_mt) <- toupper(names(df_mt)) |
162 | | - # col_mt <- toupper(col_mt) |
163 | | - col_taxon <- toupper(col_taxon) |
164 | | - |
165 | | - # QC check for col_taxon |
166 | | - if (!col_taxon %in% names(DF_User)) { |
167 | | - stop(paste0("DF_User missing column; ", col_taxon)) |
168 | | - } ## IF, stop |
169 | | - |
170 | | - # taxa names to ALL CAPS for bugs and fish |
171 | | - DF_User[, col_taxon] <- toupper(DF_User[, col_taxon]) |
172 | | - |
173 | | - # Check Numbers |
174 | | - taxa_user <- sort(unique(DF_User[, col_taxon])) |
175 | | - taxa_user_n <- length(taxa_user) |
176 | | - boo_taxa_match <- taxa_user %in% df_mt[, col_taxon] |
177 | | - sum_taxa_match <- sum(boo_taxa_match) |
178 | | - taxa_nonmatch <- taxa_user[!boo_taxa_match] |
179 | | - # Output to Console |
180 | | - msg <- paste0("Taxa match, ", sum_taxa_match, " / ", taxa_user_n) |
181 | | - message(msg) |
182 | | - # Inform user of the non-matches |
183 | | - if(sum_taxa_match != taxa_user_n){ |
184 | | - n_nonmatch <- taxa_user_n - sum_taxa_match |
185 | | - str_tax <- ifelse(n_nonmatch == 1, "taxon", "taxa") |
186 | | - msg_1 <- paste0("The following user ", |
187 | | - str_tax, |
188 | | - " (", |
189 | | - n_nonmatch, |
190 | | - "/", |
191 | | - taxa_user_n, |
192 | | - ") did not match the master list.\n") |
193 | | - msg_2 <- paste0(taxa_nonmatch, collapse = "\n") |
194 | | - message(paste0(msg_1, msg_2)) |
195 | | - }##IF ~ non-matches ~ END |
196 | | - |
197 | | - |
198 | | - |
199 | | - # Merge and Munge Columns |
200 | | - ## Columns |
201 | | - # col_mt_nonTaxon <- col_mt[!(col_mt %in% col_taxon)] |
202 | | - # col_mt_nonOfficial <- paste0(col_mt_nonTaxon, sfx_NonOfficial) |
203 | | - # boo_col_match <- colnames(DF_User) %in% col_mt_nonTaxon |
204 | | - # col_mod <- colnames(DF_User)[boo_col_match] |
205 | | - ## Rename matching columns before merge |
206 | | - #names(DF_User)[boo_col_match] <- paste0(names(DF_User)[boo_col_match] |
207 | | - # , "_NonOfficial") |
208 | | - # more control than using suffixes in merge() |
209 | 51 | # |
210 | | - ## Merge |
211 | | - # df_merge <- merge(DF_User, df_mt |
212 | | - # , by = col_taxon |
213 | | - # , all.x = TRUE) |
214 | | - ## Munge Cols |
215 | | - if(useOfficialTaxaInfo == "only_Official"){ |
216 | | - # Do Nothing |
217 | | - # leave in "_NonOfficial" columns |
218 | | - df_result <- merge(DF_User, df_mt, |
219 | | - by = col_taxon, |
220 | | - all.x = TRUE, |
221 | | - suffixes = c(sfx_NonOfficial, "")) |
222 | | - |
223 | | - #names(df_result) <- gsub(".x$", "", names(df_result)) |
224 | | - |
225 | | - # df_result <- dplyr::left_join(DF_User, df_mt |
226 | | - # , by = col_taxon |
227 | | - # , suffix = c(sfx_NonOfficial, "")) |
228 | | - |
229 | | - } else if(useOfficialTaxaInfo == "only_user"){ |
230 | | - # Reverse and keep _NonOfficial and remove official field |
231 | | - # # Remove Official Cols |
232 | | - # col_keep <- !(names(df_merge) %in% col_mod) |
233 | | - # df_result <- df_merge[, col_keep] |
234 | | - # # Revert "_NonOfficial" |
235 | | - # names(df_result) <- gsub("_NonOfficial$", "", names(df_result)) |
236 | | - |
237 | | - df_result <- merge(DF_User, df_mt, |
238 | | - by = col_taxon, |
239 | | - all.x = TRUE, |
240 | | - suffixes = c("", sfx_Official)) |
241 | | - |
242 | | - |
243 | | - # df_result <- dplyr::left_join(DF_User, df_mt |
244 | | - # , by = col_taxon |
245 | | - # , suffix = c("", sfx_Official)) |
246 | | - |
247 | | - } else if(useOfficialTaxaInfo == "add_new"){ |
248 | | - # add user info for new taxa to official columns |
249 | | - # df_result <- df_merge |
250 | | - # df_merge[df_merge[, col_taxon] == taxa_nonmatch, col_mod] <- |
251 | | - # df_merge[df_merge[, col_taxon] == taxa_nonmatch, paste0(col_mod |
252 | | - # , "_NonOfficial")] |
253 | | - |
254 | | - df_result <- merge(DF_User, df_mt, |
255 | | - by = col_taxon, |
256 | | - all.x = TRUE, |
257 | | - suffixes = c(sfx_NonOfficial, "")) |
258 | | - |
259 | | - # df_result <- dplyr::left_join(DF_User, df_mt |
260 | | - # , by = col_taxon |
261 | | - # , suffix = c(sfx_NonOfficial, "")) |
262 | | - |
263 | | - col_match_y <- names(df_result)[grepl(paste0(sfx_NonOfficial,"$") |
264 | | - , names(df_result))] |
265 | | - col_match_x <- gsub(paste0(sfx_NonOfficial,"$"), "", col_match_y) |
266 | | - df_result[df_result[, col_taxon] == taxa_nonmatch, col_match_x] <- |
267 | | - df_result[df_result[, col_taxon] == taxa_nonmatch, col_match_y] |
268 | | - |
269 | | - } else { |
270 | | - # Stop if wrong values |
271 | | - msg <- "Valid values for useOfficialTaxaInfo are |
272 | | - 'only_Official', 'only_user', or 'add_new'." |
273 | | - stop(msg) |
274 | | - } |
275 | | - |
276 | | - # QC |
277 | | - ## Missing Columns |
278 | | - |
279 | | - ## Valid values |
280 | | - # Bugs = "FFG", "FAM_TV", "Habit", "FinalTolVal07" |
281 | | - # Fish = TYPE, PTROLR, TROPHIC |
282 | | - |
283 | | - # Other columns for metric calculation |
284 | | - # Bugs = EXCLUDE, STRATA_R |
285 | | - # Fish = |
286 | | - |
287 | | - |
288 | | - # Output |
289 | | - return(df_result) |
| 52 | + .Deprecated("qc_taxa") |
| 53 | + qc_taxa(DF_User, |
| 54 | + DF_Official, |
| 55 | + fun.Community, |
| 56 | + useOfficialTaxaInfo) |
290 | 57 | # |
291 | 58 | }##FUNCTION ~ qc_taxa ~ END |
0 commit comments