Skip to content

Commit 830a3ec

Browse files
authored
Merge pull request #135 from leppott/dev_qc_taxa
Dev qc taxa
2 parents b245d32 + e68d838 commit 830a3ec

15 files changed

Lines changed: 847 additions & 293 deletions

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Package: BioMonTools
22
Type: Package
33
Title: Biomonitoring and Bioassessment Calculations
4-
Version: 1.2.4.9008
4+
Version: 1.2.4.9012
55
Authors@R: c(
66
person("Erik W.", "Leppo",
77
email="Erik.Leppo@tetratech.com",

NAMESPACE

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@ export(metric.values.fish)
1414
export(metvalgrpxl)
1515
export(qc.checks)
1616
export(qc_taxa)
17+
export(qc_taxa_match_official)
18+
export(qc_taxa_values_ffg)
19+
export(qc_taxa_values_habit)
20+
export(qc_taxa_values_tolval)
1721
export(rarify)
1822
export(taxa_translate)
1923
importFrom(rlang,.data)

NEWS

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,29 @@ NEWS
44

55
<!-- NEWS.md is generated from NEWS.Rmd. Please edit that file -->
66

7-
#> Last Update: 2026-03-18 20:26:19.353336
7+
#> Last Update: 2026-03-18 22:51:30.206983
88

99
# Version History
1010

11+
## Changes in version 1.2.4.9012 (2026-03-18)
12+
13+
- feature: Add qc_taxa_values_tolval function
14+
15+
## Changes in version 1.2.4.9011 (2026-03-18)
16+
17+
- refactor: Add default column name to qc_taxa_values_ffg
18+
- feature: Add qc_taxa_values_habit function
19+
20+
## Changes in version 1.2.4.9010 (2026-03-18)
21+
22+
- feature: Add qc_taxa_values_ffg function
23+
24+
## Changes in version 1.2.4.9009 (2026-03-18)
25+
26+
- deprecate: Change qc_taxa to qc_taxa_match_official
27+
- Will be removed in a future version
28+
- refactor: Add qc_taxa_match_official and update with new name
29+
1130
## Changes in version 1.2.4.9008 (2026-03-18)
1231

1332
- test: Add test for metric.values for collapsing, bugs and fish, Issue

NEWS.md

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,29 @@ NEWS
44

55
<!-- NEWS.md is generated from NEWS.Rmd. Please edit that file -->
66

7-
#> Last Update: 2026-03-18 20:26:19.353336
7+
#> Last Update: 2026-03-18 22:51:30.206983
88

99
# Version History
1010

11+
## Changes in version 1.2.4.9012 (2026-03-18)
12+
13+
- feature: Add qc_taxa_values_tolval function
14+
15+
## Changes in version 1.2.4.9011 (2026-03-18)
16+
17+
- refactor: Add default column name to qc_taxa_values_ffg
18+
- feature: Add qc_taxa_values_habit function
19+
20+
## Changes in version 1.2.4.9010 (2026-03-18)
21+
22+
- feature: Add qc_taxa_values_ffg function
23+
24+
## Changes in version 1.2.4.9009 (2026-03-18)
25+
26+
- deprecate: Change qc_taxa to qc_taxa_match_official
27+
- Will be removed in a future version
28+
- refactor: Add qc_taxa_match_official and update with new name
29+
1130
## Changes in version 1.2.4.9008 (2026-03-18)
1231

1332
- test: Add test for metric.values for collapsing, bugs and fish, Issue

NEWS.rmd

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,25 @@ cat(paste0("Last Update: ",Sys.time()))
1818

1919
# Version History
2020

21+
## Changes in version 1.2.4.9012 (2026-03-18)
22+
23+
* feature: Add qc_taxa_values_tolval function
24+
25+
## Changes in version 1.2.4.9011 (2026-03-18)
26+
27+
* refactor: Add default column name to qc_taxa_values_ffg
28+
* feature: Add qc_taxa_values_habit function
29+
30+
## Changes in version 1.2.4.9010 (2026-03-18)
31+
32+
* feature: Add qc_taxa_values_ffg function
33+
34+
## Changes in version 1.2.4.9009 (2026-03-18)
35+
36+
* deprecate: Change qc_taxa to qc_taxa_match_official
37+
+ Will be removed in a future version
38+
* refactor: Add qc_taxa_match_official and update with new name
39+
2140
## Changes in version 1.2.4.9008 (2026-03-18)
2241

2342
* test: Add test for metric.values for collapsing, bugs and fish, Issue #131

R/qc_taxa.R

Lines changed: 8 additions & 241 deletions
Original file line numberDiff line numberDiff line change
@@ -1,56 +1,10 @@
11
#' Quality Control Check on User Data Against Master Taxa List
22
#'
3-
#' This function compares the user's data frame to a data frame with the
4-
#' official (or user supplied) master taxa list (benthic macroinvertebrates).
3+
#' This function has been deprecated (March 2026).
54
#'
6-
#' Output is a data frame with matches.
5+
#' The new function is qc_taxa_match_official.
76
#'
8-
#' Messages are output to the console with the number of matches and which user
9-
#' taxa did not match the official list.
10-
#'
11-
#' The official list is stored online but the user can input their own saved
12-
#' copy.
13-
#'
14-
#' Any columns in the user input file that match the official master taxa list
15-
#' will be renamed with the "_NonOfficial" suffix.
16-
#'
17-
#' New/different taxa in the user data are handled by the 'useOfficialTaxaInfo'
18-
#' parameter. For taxa that did not match the master taxa list the user has
19-
#' options on how to handle the differences for the phylogeny (e.g., columns for
20-
#' phylum, class, family, etc.) and autecology (e.g., columns for FFG, habit,
21-
#' tolerance value, etc.). The options are below.
22-
#'
23-
#' * only_official = use only official master taxa information. Any
24-
#' non-matching taxa will not have any master taxa information.
25-
#'
26-
#' * only_user = only use the information provided by the user. Information
27-
#' from the 'Official' will not be used. This should only be used for
28-
#' non-official calculations.
29-
#'
30-
#' * add_new = hybrid approach that uses official master taxa information, when
31-
#' present, but includes user information for non-matching taxa if the column
32-
#' names match.
33-
#'
34-
#' Default master taxa lists are saved as CSV files online at:
35-
#'
36-
#' https://github.com/leppott/MBSStools_SupportFiles
37-
#'
38-
#' The files can be downloaded with the following code.
39-
#'
40-
#' **Benthic Macroinvertebrate**
41-
#'
42-
#' url_mt_bugs <- "https://github.com/leppott/MBSStools_SupportFiles/raw/master/Data/CHAR_Bugs.csv"
43-
#' df_mt_bugs <- read.csv(url_mt_bugs)
44-
#'
45-
#' The master taxa files are periodically updated. Update dates will be logged
46-
#' on the GitHub repository.
47-
#'
48-
#' Expected fields include:
49-
#'
50-
#' **Benthic Macroinvertebrates**
51-
#'
52-
#' + TAXON, Phylum, Class, Order, Family, Genus, Other_Taxa, Tribe, FFG,
53-
#' FAM_TV, Habit, FinalTolVal07, Comment
7+
#' This function exists only as a wrapper to avoid breaking older code.
548
#'
559
#' @param DF_User User taxa data.
5610
#' @param DF_Official Official master taxa list. Can be a local file or
@@ -94,198 +48,11 @@ qc_taxa <- function(DF_User,
9448
DF_Official = NULL,
9549
fun.Community = NULL,
9650
useOfficialTaxaInfo = "only_Official") {
97-
##FUNCTION ~ mastertaxa ~START
98-
#
99-
boo_DEBUG <- FALSE
100-
if(boo_DEBUG==TRUE){##IF~boo_DEBUG~START
101-
# # # Bugs
102-
# DF_User<- taxa_bugs_genus
103-
# DF_Official = NULL
104-
# fun.Community = "bugs"
105-
# useOfficialTaxaInfo = "only_Official"
106-
# #
107-
}##IF~boo_DEBUG~END
108-
109-
# Col Suffixes
110-
sfx_Official <- "_Official"
111-
sfx_NonOfficial <- "_NonOfficial"
112-
113-
# QC
114-
## inputs as data frames (just in case have a tibble)
115-
DF_User <- data.frame(DF_User)
116-
# DF_Official handled when checking URL
117-
## Community, convert community to lowercase
118-
fun.Community <- tolower(fun.Community)
119-
120-
# Taxa list, official
121-
# run the proper sub function
122-
if (fun.Community == "bugs") {##IF.START
123-
url_mt <- "https://github.com/leppott/MBSStools_SupportFiles/raw/master/Data/CHAR_Bugs.csv"
124-
col_mt <- c("Taxon",
125-
"Phylum",
126-
"Class",
127-
"Order",
128-
"Family",
129-
"Genus",
130-
"Other_Taxa",
131-
"Tribe",
132-
"FFG",
133-
"FAM_TV",
134-
"Habit",
135-
"FinalTolVal07",
136-
"Comment")
137-
col_taxon <- col_mt[1]
138-
# } else if(fun.Community == "fish"){
139-
# url_mt <- "https://github.com/leppott/MBSStools_SupportFiles/raw/master/Data/CHAR_Fish.csv"
140-
# col_mt <- c("SPECIES", "TYPE", "PTOLR", "NATIVE", "TROPHIC", "SILT"
141-
# , "PIRHALLA","DATE.ADDED", "REASON", "SOURCE", "FAM", "GENUS"
142-
# , "SP_SCI", "IN_KEY", "APPROX_ID" )
143-
# col_taxon <- col_mt[1]
144-
# future functionality
145-
} else {
146-
msg <- "Valid values for fun.Community is only 'bugs'."
147-
stop(msg)
148-
}##IF ~ fun.community ~ END
149-
150-
# Master Taxa
151-
# Download "official" list if none provided
152-
if(is.null(DF_Official)){
153-
# 404 Error if file not found
154-
df_mt <- utils::read.csv(url_mt)
155-
} else {
156-
df_mt <- data.frame(DF_Official)
157-
}## IF ~ is.null(DF_Official) ~ END
158-
159-
# Names to upper case
160-
names(DF_User) <- toupper(names(DF_User))
161-
names(df_mt) <- toupper(names(df_mt))
162-
# col_mt <- toupper(col_mt)
163-
col_taxon <- toupper(col_taxon)
164-
165-
# QC check for col_taxon
166-
if (!col_taxon %in% names(DF_User)) {
167-
stop(paste0("DF_User missing column; ", col_taxon))
168-
} ## IF, stop
169-
170-
# taxa names to ALL CAPS for bugs and fish
171-
DF_User[, col_taxon] <- toupper(DF_User[, col_taxon])
172-
173-
# Check Numbers
174-
taxa_user <- sort(unique(DF_User[, col_taxon]))
175-
taxa_user_n <- length(taxa_user)
176-
boo_taxa_match <- taxa_user %in% df_mt[, col_taxon]
177-
sum_taxa_match <- sum(boo_taxa_match)
178-
taxa_nonmatch <- taxa_user[!boo_taxa_match]
179-
# Output to Console
180-
msg <- paste0("Taxa match, ", sum_taxa_match, " / ", taxa_user_n)
181-
message(msg)
182-
# Inform user of the non-matches
183-
if(sum_taxa_match != taxa_user_n){
184-
n_nonmatch <- taxa_user_n - sum_taxa_match
185-
str_tax <- ifelse(n_nonmatch == 1, "taxon", "taxa")
186-
msg_1 <- paste0("The following user ",
187-
str_tax,
188-
" (",
189-
n_nonmatch,
190-
"/",
191-
taxa_user_n,
192-
") did not match the master list.\n")
193-
msg_2 <- paste0(taxa_nonmatch, collapse = "\n")
194-
message(paste0(msg_1, msg_2))
195-
}##IF ~ non-matches ~ END
196-
197-
198-
199-
# Merge and Munge Columns
200-
## Columns
201-
# col_mt_nonTaxon <- col_mt[!(col_mt %in% col_taxon)]
202-
# col_mt_nonOfficial <- paste0(col_mt_nonTaxon, sfx_NonOfficial)
203-
# boo_col_match <- colnames(DF_User) %in% col_mt_nonTaxon
204-
# col_mod <- colnames(DF_User)[boo_col_match]
205-
## Rename matching columns before merge
206-
#names(DF_User)[boo_col_match] <- paste0(names(DF_User)[boo_col_match]
207-
# , "_NonOfficial")
208-
# more control than using suffixes in merge()
20951
#
210-
## Merge
211-
# df_merge <- merge(DF_User, df_mt
212-
# , by = col_taxon
213-
# , all.x = TRUE)
214-
## Munge Cols
215-
if(useOfficialTaxaInfo == "only_Official"){
216-
# Do Nothing
217-
# leave in "_NonOfficial" columns
218-
df_result <- merge(DF_User, df_mt,
219-
by = col_taxon,
220-
all.x = TRUE,
221-
suffixes = c(sfx_NonOfficial, ""))
222-
223-
#names(df_result) <- gsub(".x$", "", names(df_result))
224-
225-
# df_result <- dplyr::left_join(DF_User, df_mt
226-
# , by = col_taxon
227-
# , suffix = c(sfx_NonOfficial, ""))
228-
229-
} else if(useOfficialTaxaInfo == "only_user"){
230-
# Reverse and keep _NonOfficial and remove official field
231-
# # Remove Official Cols
232-
# col_keep <- !(names(df_merge) %in% col_mod)
233-
# df_result <- df_merge[, col_keep]
234-
# # Revert "_NonOfficial"
235-
# names(df_result) <- gsub("_NonOfficial$", "", names(df_result))
236-
237-
df_result <- merge(DF_User, df_mt,
238-
by = col_taxon,
239-
all.x = TRUE,
240-
suffixes = c("", sfx_Official))
241-
242-
243-
# df_result <- dplyr::left_join(DF_User, df_mt
244-
# , by = col_taxon
245-
# , suffix = c("", sfx_Official))
246-
247-
} else if(useOfficialTaxaInfo == "add_new"){
248-
# add user info for new taxa to official columns
249-
# df_result <- df_merge
250-
# df_merge[df_merge[, col_taxon] == taxa_nonmatch, col_mod] <-
251-
# df_merge[df_merge[, col_taxon] == taxa_nonmatch, paste0(col_mod
252-
# , "_NonOfficial")]
253-
254-
df_result <- merge(DF_User, df_mt,
255-
by = col_taxon,
256-
all.x = TRUE,
257-
suffixes = c(sfx_NonOfficial, ""))
258-
259-
# df_result <- dplyr::left_join(DF_User, df_mt
260-
# , by = col_taxon
261-
# , suffix = c(sfx_NonOfficial, ""))
262-
263-
col_match_y <- names(df_result)[grepl(paste0(sfx_NonOfficial,"$")
264-
, names(df_result))]
265-
col_match_x <- gsub(paste0(sfx_NonOfficial,"$"), "", col_match_y)
266-
df_result[df_result[, col_taxon] == taxa_nonmatch, col_match_x] <-
267-
df_result[df_result[, col_taxon] == taxa_nonmatch, col_match_y]
268-
269-
} else {
270-
# Stop if wrong values
271-
msg <- "Valid values for useOfficialTaxaInfo are
272-
'only_Official', 'only_user', or 'add_new'."
273-
stop(msg)
274-
}
275-
276-
# QC
277-
## Missing Columns
278-
279-
## Valid values
280-
# Bugs = "FFG", "FAM_TV", "Habit", "FinalTolVal07"
281-
# Fish = TYPE, PTROLR, TROPHIC
282-
283-
# Other columns for metric calculation
284-
# Bugs = EXCLUDE, STRATA_R
285-
# Fish =
286-
287-
288-
# Output
289-
return(df_result)
52+
.Deprecated("qc_taxa")
53+
qc_taxa(DF_User,
54+
DF_Official,
55+
fun.Community,
56+
useOfficialTaxaInfo)
29057
#
29158
}##FUNCTION ~ qc_taxa ~ END

0 commit comments

Comments
 (0)