Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
42bf377
test(red): add failing tests for impute_missing_scores and strategy f…
seabbs-bot Mar 30, 2026
9342685
feat: implement build_missing_grid, filter_missing_scores, and filter…
seabbs-bot Mar 30, 2026
8d81290
feat(green): implement impute_missing_scores and strategy factories
seabbs-bot Mar 30, 2026
3e1298d
style: fix lint warnings in impute tests
seabbs-bot Mar 30, 2026
b15c5cf
style: suppress lint warning for internal function reference
seabbs-bot Mar 30, 2026
6253cab
Rename filter_missing_scores() to filter_scores()
seabbs-bot Mar 30, 2026
26fd2d8
fix: pass compare param through to strategy functions
seabbs-bot Mar 30, 2026
1d50511
fix: update integration test to use renamed filter_scores
seabbs-bot Mar 30, 2026
06b65e7
style: fix indentation in impute test
seabbs-bot Mar 30, 2026
dbcddb2
refactor: rename models→include, fix review issues
seabbs-bot Mar 30, 2026
6ca383f
style: fix redundant_equals_linter in test
seabbs-bot Mar 30, 2026
797b926
docs: add vignette for handling missing forecasts
seabbs-bot Mar 30, 2026
048be7c
docs: add vignette and NEWS entry for missing scores handling
seabbs-bot Mar 30, 2026
3d266ab
docs: improve vignette clarity and fix review issues
seabbs-bot Mar 30, 2026
a84aea2
docs: rewrite vignette and add CLI messages to impute_missing_scores
seabbs-bot Mar 31, 2026
3fde364
style: put cli_inform message on single line
seabbs-bot Mar 31, 2026
3ff432a
fix: use cli::qty() for correct pluralisation in impute message
seabbs-bot Mar 31, 2026
3490100
docs: credit Kim et al (2026) as inspiration for missing scores handling
seabbs-bot Mar 31, 2026
e11bc62
docs: soften absolute claims in vignette prose
seabbs-bot Mar 31, 2026
eb1ebcd
test: cover missing metric columns and nonexistent ref model
seabbs-bot Mar 31, 2026
8856e8d
docs: add articles section to pkgdown config
seabbs-bot Mar 31, 2026
bf60b74
docs: rename articles group from Getting started to Articles
seabbs-bot Mar 31, 2026
12ef1a7
revert: remove articles section from pkgdown config
seabbs-bot Mar 31, 2026
d9aa0fd
docs: address vignette review TODOs
seabbs-bot Mar 31, 2026
3cfcfcc
test: add integration tests for filter and impute with real data
seabbs-bot Mar 31, 2026
22c790f
docs: address vignette TODOs, suppress test messages
seabbs-bot Mar 31, 2026
a7934c6
style: use bare i = in cli_inform to avoid nolint blocks
seabbs-bot Mar 31, 2026
7c88db2
style: remove unhelpful comment in build_missing_grid
seabbs-bot Mar 31, 2026
1803d9c
docs: add Post-process scores section to pkgdown reference
seabbs-bot Mar 31, 2026
cfdc3c4
test: verify impute_mean_score values, original rows unchanged, filte…
seabbs-bot Mar 31, 2026
1cf4beb
test: update vdiffr plot snapshots after merge from main
seabbs-bot Apr 1, 2026
f9f91f1
Merge branch 'main' into issue-1122-missing-scores
seabbs Apr 8, 2026
5da130a
Merge branch 'main' into issue-1122-missing-scores
seabbs Apr 20, 2026
8e380f8
Merge branch 'main' into issue-1122-missing-scores
seabbs Apr 20, 2026
1953157
Merge branch 'main' into issue-1122-missing-scores
seabbs Apr 20, 2026
0d3cf4b
refactor(filter-scores): split intersection strategy and tidy
seabbs-bot Apr 22, 2026
5a4beee
refactor(impute-scores): share summary helper and guard all-NA
seabbs-bot Apr 22, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,9 @@ export(dispersion_quantile)
export(dispersion_sample)
export(dss_sample)
export(energy_score_multivariate)
export(filter_scores)
export(filter_to_include)
export(filter_to_intersection)
export(get_correlations)
export(get_coverage)
export(get_duplicate_forecasts)
Expand All @@ -88,6 +91,11 @@ export(get_grouping)
export(get_metrics)
export(get_pairwise_comparisons)
export(get_pit_histogram)
export(impute_mean_score)
export(impute_missing_scores)
export(impute_model_score)
export(impute_na_score)
export(impute_worst_score)
export(interval_coverage)
export(is_forecast)
export(is_forecast_binary)
Expand Down Expand Up @@ -178,12 +186,14 @@ importFrom(data.table,key)
importFrom(data.table,melt)
importFrom(data.table,nafill)
importFrom(data.table,rbindlist)
importFrom(data.table,set)
importFrom(data.table,setDT)
importFrom(data.table,setattr)
importFrom(data.table,setcolorder)
importFrom(data.table,setkeyv)
importFrom(data.table,setnames)
importFrom(data.table,setorderv)
importFrom(data.table,uniqueN)
importFrom(ggplot2,.data)
importFrom(ggplot2,`%+replace%`)
importFrom(ggplot2,aes)
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# scoringutils (development version)

- Added `filter_scores()` and `impute_missing_scores()` for handling missing forecasts before summarisation. `filter_scores()` removes target combinations with insufficient model coverage, while `impute_missing_scores()` fills in missing scores using configurable strategies (worst, mean, NA, or reference model). Both use a strategy function pattern for extensibility. See `vignette("handling-missing-forecasts")` for details (#1122).
- Added internal S3 generic `get_forecast_type_ids()` so each forecast type declares the columns (beyond the forecast unit) that identify a unique row. `get_duplicate_forecasts()` now uses this instead of hard-coded column names (#888).
- Removed the deprecated vignettes `Deprecated-functions` and `Deprecated-visualisations`. The code for removed functions (`plot_predictions()`, `make_NA()`, `plot_ranges()`, `plot_score_table()`, `merge_pred_and_obs()`) can still be found in the [git history](https://github.com/epiforecasts/scoringutils/tree/d0cd8e2/vignettes) (#1158).

Expand Down
228 changes: 228 additions & 0 deletions R/filter-scores.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,228 @@
#' @title Filter scores
#'
#' @description
#' Filter a `scores` object using a supplied strategy function.
#' `filter_scores()` is responsible for preserving the `scores`
#' class and the `metrics` attribute; the strategy is
#' responsible only for the filtering logic.
#'
#' Strategies are constructed by helpers such as
#' [filter_to_intersection()] and [filter_to_include()] and can
#' also be user-defined. A strategy is a function with
#' signature `function(scores, compare)` that returns a
#' filtered data.table with the same columns as its input.
#'
#' @param scores An object of class `scores` (a data.table with
#' an additional `metrics` attribute as produced by [score()]).
#' @param strategy A strategy function. See Description for the
#' expected signature. Default: [filter_to_intersection()].
#' @param compare Character string (default `"model"`) naming the
#' column whose values are compared when deciding which
#' target combinations to keep.
#'
#' @return A `scores` object with the same class and `metrics`
#' attribute as the input, with rows filtered according to
#' `strategy`.
#'
#' @seealso [filter_to_intersection()], [filter_to_include()],
#' \code{vignette("handling-missing-forecasts")}
#' @importFrom cli cli_inform
#' @importFrom checkmate assert_class assert_character
#' assert_subset
#' @export
#' @keywords postprocess-scores
Comment thread
sbfnk marked this conversation as resolved.
#' @examples
#' \dontshow{
#' data.table::setDTthreads(2)
#' }
#' scores <- example_quantile |>
#' as_forecast_quantile() |>
#' score()
#'
#' # Keep only targets covered by every model (the default)
#' filter_scores(scores)
#'
#' # Keep targets covered by at least 75% of models
#' filter_scores(
#' scores,
#' strategy = filter_to_intersection(min_coverage = 0.75)
#' )
#'
#' # Keep only targets covered by a named model
#' filter_scores(
#' scores,
#' strategy = filter_to_include("EuroCOVIDhub-baseline")
#' )
filter_scores <- function(
scores,
strategy = filter_to_intersection(),
Comment thread
sbfnk marked this conversation as resolved.
compare = "model"
Comment thread
sbfnk marked this conversation as resolved.
) {
assert_class(scores, "scores")
assert_character(compare, len = 1)
assert_subset(compare, names(scores))
assert_strategy(strategy, required = "compare")

original_metrics <- attr(scores, "metrics")

result <- strategy(scores, compare = compare)
Comment thread
sbfnk marked this conversation as resolved.

n_before <- nrow(scores)
n_after <- nrow(result)
n_dropped <- n_before - n_after

if (n_dropped == 0) {
cli_inform(c(
i = "No rows filtered. Returning scores unchanged."
))
return(scores)
}

cli_inform(c(
i = "Filtered out {n_dropped} rows.",
i = "{n_after} of {n_before} rows remaining." # nolint: duplicate_argument_linter
))

return(new_scores(result, original_metrics))
}


#' @title Filter to target combinations meeting a coverage threshold
#'
#' @description
#' Strategy for [filter_scores()] that keeps target combinations
#' covered by at least `min_coverage` of the values in the
#' `compare` column. With the default `min_coverage = 1`, only
#' target combinations present for every compare value are kept
#' (strict intersection across the full set).
#'
#' To restrict to the targets covered by a named subset of
#' compare values instead of by a proportion, use
#' [filter_to_include()].
#'
#' @param min_coverage Numeric between 0 and 1 (default `1`).
#' Minimum proportion of compare values that must cover a
#' target combination for it to be kept.
#'
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

looking through the code I think min_coverage and include are mutually exclusive - should this be two different functions?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

More generally the documentation is quite dense (I didn't understand it before looking at the code) and really could do with some examples.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Split: filter_to_intersection(min_coverage) handles the coverage threshold, filter_to_include(include) handles the named intersection. Each now has single-purpose docs and its own examples. 0d3cf4b.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Docs rewritten and examples added on both strategy factories. 0d3cf4b.

#' @return A strategy function for [filter_scores()]. Intended
#' to be passed to `filter_scores()` rather than called
#' directly — `filter_scores()` is where the `scores` class
#' and `metrics` attribute are preserved.
#'
#' @seealso [filter_scores()], [filter_to_include()]
#' @importFrom data.table as.data.table setkeyv uniqueN
#' @importFrom checkmate assert_number
#' @export
#' @keywords postprocess-scores
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

examples?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added @examples on both filter_to_intersection() and the new filter_to_include(). 0d3cf4b.

#' @examples
#' \dontshow{
#' data.table::setDTthreads(2)
#' }
#' scores <- example_quantile |>
#' as_forecast_quantile() |>
#' score()
#' filter_scores(
#' scores,
#' strategy = filter_to_intersection(min_coverage = 0.75)
#' )
filter_to_intersection <- function(min_coverage = 1) {
assert_number(min_coverage, lower = 0, upper = 1)

function(scores, compare = "model") {
scores <- data.table::as.data.table(scores)
forecast_unit <- get_forecast_unit(scores)
target_cols <- setdiff(forecast_unit, compare)

n_total <- data.table::uniqueN(scores[[compare]])

target_coverage <- scores[,
.(n_compare = data.table::uniqueN(get(compare))),
by = target_cols
]

keep <- target_coverage$n_compare / n_total >= min_coverage
qualifying <- target_coverage[keep, target_cols, with = FALSE]

data.table::setkeyv(scores, target_cols)
data.table::setkeyv(qualifying, target_cols)
scores[qualifying, nomatch = NULL]
}
}


#' @title Filter to targets covered by named compare values
#'
#' @description
#' Strategy for [filter_scores()] that restricts the kept
#' target combinations to those covered by every value listed
#' in `include`. With a single value this keeps only that
#' value's targets; with several values, the intersection of
#' their target sets is kept.
#'
#' To use a proportion-based threshold over all compare values
#' instead, use [filter_to_intersection()].
#'
#' @param include Character vector of length one or more. Values
#' from the `compare` column whose target sets should be
#' intersected.
#'
#' @return A strategy function for [filter_scores()]. Intended
#' to be passed to `filter_scores()` rather than called
#' directly — `filter_scores()` is where the `scores` class
#' and `metrics` attribute are preserved.
#'
#' @seealso [filter_scores()], [filter_to_intersection()]
#' @importFrom data.table as.data.table setkeyv
#' @importFrom checkmate assert_character
#' @importFrom cli cli_abort
#' @export
#' @keywords postprocess-scores
#' @examples
#' \dontshow{
#' data.table::setDTthreads(2)
#' }
#' scores <- example_quantile |>
#' as_forecast_quantile() |>
#' score()
#' filter_scores(
#' scores,
#' strategy = filter_to_include("EuroCOVIDhub-baseline")
#' )
filter_to_include <- function(include) {
assert_character(include, min.len = 1)

function(scores, compare = "model") {
scores <- data.table::as.data.table(scores)
forecast_unit <- get_forecast_unit(scores)
target_cols <- setdiff(forecast_unit, compare)

unknown <- setdiff(include, unique(scores[[compare]]))
if (length(unknown) > 0) {
cli_abort(c(
"!" = paste0(
"{.val {unknown}} not found in ",
"{.arg {compare}} column."
)
))
}

target_sets <- lapply(include, function(v) {
unique(
scores[
scores[[compare]] == v,
target_cols,
with = FALSE
]
)
})

qualifying <- Reduce(
function(a, b) merge(a, b, by = target_cols),
target_sets
)

data.table::setkeyv(scores, target_cols)
data.table::setkeyv(qualifying, target_cols)
scores[qualifying, nomatch = NULL]
}
}
1 change: 1 addition & 0 deletions R/get-protected-columns.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ get_protected_columns <- function(data = NULL) {

protected_columns <- c(
".mv_group_id",
".imputed",
"predicted", "observed", "sample_id", "quantile_level", "upper", "lower",
"pit_value", "interval_range", "boundary", "predicted_label",
"interval_coverage", "interval_coverage_deviation",
Expand Down
Loading
Loading