epiforecasts · seabbs-bot · Mar 30, 2026 · Mar 30, 2026 · Mar 30, 2026 · Mar 30, 2026
diff --git a/NAMESPACE b/NAMESPACE
@@ -78,6 +78,9 @@ export(dispersion_quantile)
 export(dispersion_sample)
 export(dss_sample)
 export(energy_score_multivariate)
+export(filter_scores)
+export(filter_to_include)
+export(filter_to_intersection)
 export(get_correlations)
 export(get_coverage)
 export(get_duplicate_forecasts)
@@ -88,6 +91,11 @@ export(get_grouping)
 export(get_metrics)
 export(get_pairwise_comparisons)
 export(get_pit_histogram)
+export(impute_mean_score)
+export(impute_missing_scores)
+export(impute_model_score)
+export(impute_na_score)
+export(impute_worst_score)
 export(interval_coverage)
 export(is_forecast)
 export(is_forecast_binary)
@@ -178,12 +186,14 @@ importFrom(data.table,key)
 importFrom(data.table,melt)
 importFrom(data.table,nafill)
 importFrom(data.table,rbindlist)
+importFrom(data.table,set)
 importFrom(data.table,setDT)
 importFrom(data.table,setattr)
 importFrom(data.table,setcolorder)
 importFrom(data.table,setkeyv)
 importFrom(data.table,setnames)
 importFrom(data.table,setorderv)
+importFrom(data.table,uniqueN)
 importFrom(ggplot2,.data)
 importFrom(ggplot2,`%+replace%`)
 importFrom(ggplot2,aes)

diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,6 @@
 # scoringutils (development version)
 
+- Added `filter_scores()` and `impute_missing_scores()` for handling missing forecasts before summarisation. `filter_scores()` removes target combinations with insufficient model coverage, while `impute_missing_scores()` fills in missing scores using configurable strategies (worst, mean, NA, or reference model). Both use a strategy function pattern for extensibility. See `vignette("handling-missing-forecasts")` for details (#1122).
 - Added internal S3 generic `get_forecast_type_ids()` so each forecast type declares the columns (beyond the forecast unit) that identify a unique row. `get_duplicate_forecasts()` now uses this instead of hard-coded column names (#888).
 - Removed the deprecated vignettes `Deprecated-functions` and `Deprecated-visualisations`. The code for removed functions (`plot_predictions()`, `make_NA()`, `plot_ranges()`, `plot_score_table()`, `merge_pred_and_obs()`) can still be found in the [git history](https://github.com/epiforecasts/scoringutils/tree/d0cd8e2/vignettes) (#1158).
 

diff --git a/R/filter-scores.R b/R/filter-scores.R
@@ -0,0 +1,228 @@
+#' @title Filter scores
+#'
+#' @description
+#' Filter a `scores` object using a supplied strategy function.
+#' `filter_scores()` is responsible for preserving the `scores`
+#' class and the `metrics` attribute; the strategy is
+#' responsible only for the filtering logic.
+#'
+#' Strategies are constructed by helpers such as
+#' [filter_to_intersection()] and [filter_to_include()] and can
+#' also be user-defined. A strategy is a function with
+#' signature `function(scores, compare)` that returns a
+#' filtered data.table with the same columns as its input.
+#'
+#' @param scores An object of class `scores` (a data.table with
+#'   an additional `metrics` attribute as produced by [score()]).
+#' @param strategy A strategy function. See Description for the
+#'   expected signature. Default: [filter_to_intersection()].
+#' @param compare Character string (default `"model"`) naming the
+#'   column whose values are compared when deciding which
+#'   target combinations to keep.
+#'
+#' @return A `scores` object with the same class and `metrics`
+#'   attribute as the input, with rows filtered according to
+#'   `strategy`.
+#'
+#' @seealso [filter_to_intersection()], [filter_to_include()],
+#'   \code{vignette("handling-missing-forecasts")}
+#' @importFrom cli cli_inform
+#' @importFrom checkmate assert_class assert_character
+#'   assert_subset
+#' @export
+#' @keywords postprocess-scores
+#' @examples
+#' \dontshow{
+#'   data.table::setDTthreads(2)
+#' }
+#' scores <- example_quantile |>
+#'   as_forecast_quantile() |>
+#'   score()
+#'
+#' # Keep only targets covered by every model (the default)
+#' filter_scores(scores)
+#'
+#' # Keep targets covered by at least 75% of models
+#' filter_scores(
+#'   scores,
+#'   strategy = filter_to_intersection(min_coverage = 0.75)
+#' )
+#'
+#' # Keep only targets covered by a named model
+#' filter_scores(
+#'   scores,
+#'   strategy = filter_to_include("EuroCOVIDhub-baseline")
+#' )
+filter_scores <- function(
+  scores,
+  strategy = filter_to_intersection(),
+  compare = "model"
+) {
+  assert_class(scores, "scores")
+  assert_character(compare, len = 1)
+  assert_subset(compare, names(scores))
+  assert_strategy(strategy, required = "compare")
+
+  original_metrics <- attr(scores, "metrics")
+
+  result <- strategy(scores, compare = compare)
+
+  n_before <- nrow(scores)
+  n_after <- nrow(result)
+  n_dropped <- n_before - n_after
+
+  if (n_dropped == 0) {
+    cli_inform(c(
+      i = "No rows filtered. Returning scores unchanged."
+    ))
+    return(scores)
+  }
+
+  cli_inform(c(
+    i = "Filtered out {n_dropped} rows.",
+    i = "{n_after} of {n_before} rows remaining." # nolint: duplicate_argument_linter
+  ))
+
+  return(new_scores(result, original_metrics))
+}
+
+
+#' @title Filter to target combinations meeting a coverage threshold
+#'
+#' @description
+#' Strategy for [filter_scores()] that keeps target combinations
+#' covered by at least `min_coverage` of the values in the
+#' `compare` column. With the default `min_coverage = 1`, only
+#' target combinations present for every compare value are kept
+#' (strict intersection across the full set).
+#'
+#' To restrict to the targets covered by a named subset of
+#' compare values instead of by a proportion, use
+#' [filter_to_include()].
+#'
+#' @param min_coverage Numeric between 0 and 1 (default `1`).
+#'   Minimum proportion of compare values that must cover a
+#'   target combination for it to be kept.
+#'
+#' @return A strategy function for [filter_scores()]. Intended
+#'   to be passed to `filter_scores()` rather than called
+#'   directly — `filter_scores()` is where the `scores` class
+#'   and `metrics` attribute are preserved.
+#'
+#' @seealso [filter_scores()], [filter_to_include()]
+#' @importFrom data.table as.data.table setkeyv uniqueN
+#' @importFrom checkmate assert_number
+#' @export
+#' @keywords postprocess-scores
+#' @examples
+#' \dontshow{
+#'   data.table::setDTthreads(2)
+#' }
+#' scores <- example_quantile |>
+#'   as_forecast_quantile() |>
+#'   score()
+#' filter_scores(
+#'   scores,
+#'   strategy = filter_to_intersection(min_coverage = 0.75)
+#' )
+filter_to_intersection <- function(min_coverage = 1) {
+  assert_number(min_coverage, lower = 0, upper = 1)
+
+  function(scores, compare = "model") {
+    scores <- data.table::as.data.table(scores)
+    forecast_unit <- get_forecast_unit(scores)
+    target_cols <- setdiff(forecast_unit, compare)
+
+    n_total <- data.table::uniqueN(scores[[compare]])
+
+    target_coverage <- scores[,
+      .(n_compare = data.table::uniqueN(get(compare))),
+      by = target_cols
+    ]
+
+    keep <- target_coverage$n_compare / n_total >= min_coverage
+    qualifying <- target_coverage[keep, target_cols, with = FALSE]
+
+    data.table::setkeyv(scores, target_cols)
+    data.table::setkeyv(qualifying, target_cols)
+    scores[qualifying, nomatch = NULL]
+  }
+}
+
+
+#' @title Filter to targets covered by named compare values
+#'
+#' @description
+#' Strategy for [filter_scores()] that restricts the kept
+#' target combinations to those covered by every value listed
+#' in `include`. With a single value this keeps only that
+#' value's targets; with several values, the intersection of
+#' their target sets is kept.
+#'
+#' To use a proportion-based threshold over all compare values
+#' instead, use [filter_to_intersection()].
+#'
+#' @param include Character vector of length one or more. Values
+#'   from the `compare` column whose target sets should be
+#'   intersected.
+#'
+#' @return A strategy function for [filter_scores()]. Intended
+#'   to be passed to `filter_scores()` rather than called
+#'   directly — `filter_scores()` is where the `scores` class
+#'   and `metrics` attribute are preserved.
+#'
+#' @seealso [filter_scores()], [filter_to_intersection()]
+#' @importFrom data.table as.data.table setkeyv
+#' @importFrom checkmate assert_character
+#' @importFrom cli cli_abort
+#' @export
+#' @keywords postprocess-scores
+#' @examples
+#' \dontshow{
+#'   data.table::setDTthreads(2)
+#' }
+#' scores <- example_quantile |>
+#'   as_forecast_quantile() |>
+#'   score()
+#' filter_scores(
+#'   scores,
+#'   strategy = filter_to_include("EuroCOVIDhub-baseline")
+#' )
+filter_to_include <- function(include) {
+  assert_character(include, min.len = 1)
+
+  function(scores, compare = "model") {
+    scores <- data.table::as.data.table(scores)
+    forecast_unit <- get_forecast_unit(scores)
+    target_cols <- setdiff(forecast_unit, compare)
+
+    unknown <- setdiff(include, unique(scores[[compare]]))
+    if (length(unknown) > 0) {
+      cli_abort(c(
+        "!" = paste0(
+          "{.val {unknown}} not found in ",
+          "{.arg {compare}} column."
+        )
+      ))
+    }
+
+    target_sets <- lapply(include, function(v) {
+      unique(
+        scores[
+          scores[[compare]] == v,
+          target_cols,
+          with = FALSE
+        ]
+      )
+    })
+
+    qualifying <- Reduce(
+      function(a, b) merge(a, b, by = target_cols),
+      target_sets
+    )
+
+    data.table::setkeyv(scores, target_cols)
+    data.table::setkeyv(qualifying, target_cols)
+    scores[qualifying, nomatch = NULL]
+  }
+}
diff --git a/R/get-protected-columns.R b/R/get-protected-columns.R
@@ -15,6 +15,7 @@ get_protected_columns <- function(data = NULL) {
 
   protected_columns <- c(
     ".mv_group_id",
+    ".imputed",
     "predicted", "observed", "sample_id", "quantile_level", "upper", "lower",
     "pit_value", "interval_range", "boundary", "predicted_label",
     "interval_coverage", "interval_coverage_deviation",