The-Strategy-Unit · francisbarton · Mar 26, 2026 · Mar 26, 2026
@@ -1,35 +1,16 @@
 #' Read a parquet file from Azure storage
 #'
 #' @param container An Azure container object, as returned by [get_container]
-#' @param file The name of the file to be read, as a string. NB The file
-#'  extension does not need to be included (though it can be). The function
-#'  will error if multiple files are somehow matched.
-#' @param path The path to the directory where `file` is located, as a string.
-#'  Only needed if `file` does not already contain its full path. If file is
-#'  just a file name with no path, then provide the path to the directory here.
-#'  This must be the full path to the file location, as the function will not
-#'  search into subdirectories recursively. Set to `"/"` (the root of the
-#'  container) by default.
-#' @param info Boolean. Whether to print user feedback about the file that is
-#'  being read. Useful for checking the function is doing what is expected, but
-#'  can be turned off with `FALSE`. Can be set persistently with the option
-#'  "azkit.info". If `NULL` then it will default to the value of
-#'  [rlang::is_interactive] (that is, `TRUE` for interactive sessions).
+#' @param file string The path to the file to be read.
 #' @param ... optional arguments to be passed through to [arrow::read_parquet]
 #' @returns A tibble
 #' @examples \dontrun{
-#'   # if a full filepath is available then path can be ignored
 #'   read_azure_parquet(cont, "data/folder/path/1.parquet")
-#'   # you can provide a filename without the '.parquet' extension
-#'   # if you wish to use this partial file name matching it is probably easier
-#'   # to provide a 'path'
-#'   read_azure_parquet(cont, "case_details", "storage/parquet/2025/06/29")
 #' }
 #' @export
-read_azure_parquet <- function(container, file, path = "/", info = NULL, ...) {
-  check_blob_exists(container, file, "parquet", info, path) |>
-    # using `dest = NULL` means pass the data through as a raw vector
-    AzureStor::download_blob(container, src = _, dest = NULL) |>
+read_azure_parquet <- function(container, file, ...) {
+  # using `dest = NULL` means pass the data through as a raw vector
+  AzureStor::download_blob(container, file, dest = NULL) |>
     arrow::read_parquet(...)
 }
 
@@ -41,10 +22,9 @@ read_azure_parquet <- function(container, file, path = "/", info = NULL, ...) {
 #'  [yyjsonr::read_json_raw]
 #' @returns A list
 #' @export
-read_azure_json <- function(container, file, path = "/", info = NULL, ...) {
-  check_blob_exists(container, file, "json", info, path) |>
-    # using `dest = NULL` means pass the data through as a raw vector
-    AzureStor::download_blob(container, src = _, dest = NULL) |>
+read_azure_json <- function(container, file, ...) {
+  # using `dest = NULL` means pass the data through as a raw vector
+  AzureStor::download_blob(container, file, dest = NULL) |>
     yyjsonr::read_json_raw(...)
 }
 
@@ -56,13 +36,12 @@ read_azure_json <- function(container, file, path = "/", info = NULL, ...) {
 #'  [yyjsonr::read_json_file]
 #' @returns A list
 #' @export
-read_azure_jsongz <- function(container, file, path = "/", info = NULL, ...) {
-  full_path <- check_blob_exists(container, file, "json.gz", info, path)
+read_azure_jsongz <- function(container, file, ...) {
   dl <- withr::local_tempfile(
-    pattern = tools::file_path_sans_ext(basename(full_path), TRUE),
+    pattern = tools::file_path_sans_ext(basename(file), TRUE),
     fileext = "json.gz"
   )
-  AzureStor::download_blob(container, src = full_path, dest = dl)
+  AzureStor::download_blob(container, file, dest = dl)
   yyjsonr::read_json_file(dl, ...)
 }
 
@@ -78,12 +57,11 @@ read_azure_jsongz <- function(container, file, path = "/", info = NULL, ...) {
 #   If nothing is provided here, the compression type will be set to "none".
 #' @returns The data object that was stored in the rds file
 #' @export
-read_azure_rds <- function(container, file, path = "/", info = NULL, ...) {
+read_azure_rds <- function(container, file, ...) {
   # If the user doesn't specify a (de)compression type with `type` in `...`, we
   # will set a `type` of "none", as this seems to be the standard on SU Azure
   dots <- rlang::dots_list(..., type = "none", .homonyms = "first")
-  blob <- check_blob_exists(container, file, "rds", info, path)
-  rlang::inject(AzureStor::storage_load_rds(container, blob, !!!dots))
+  rlang::inject(AzureStor::storage_load_rds(container, file, !!!dots))
 }
 
 
@@ -93,71 +71,19 @@ read_azure_rds <- function(container, file, path = "/", info = NULL, ...) {
 #' @param ... optional arguments to be passed through to [readr::read_delim]
 #' @returns A tibble
 #' @export
-read_azure_csv <- function(container, file, path = "/", info = NULL, ...) {
-  check_blob_exists(container, file, "csv", info, path) |>
-    AzureStor::storage_read_csv(container, file = _, ...)
+read_azure_csv <- function(container, file, ...) {
+  AzureStor::storage_read_csv(container, file, ...)
 }
 
 
 #' Read any file from Azure storage
 #'
 #' @inheritParams read_azure_parquet
-#' @param ext If a custom extension needs to be supplied, you can specify it
-#'  here. If `NULL`, the default, the extension of `file` will be used
 #' @param ... optional arguments to be passed through to
 #'  [AzureStor::download_blob]
 #' @returns A raw data stream
 #' @export
-read_azure_file <- function(
-  container,
-  file,
-  path = "/",
-  info = NULL,
-  ext = NULL,
-  ...
-) {
-  ext <- ext %||% tools::file_ext(file)
-  check_blob_exists(container, file, ext, info, path) |>
-    # using `dest = NULL` means pass the data through as a raw vector
-    AzureStor::download_blob(container, src = _, dest = NULL, ...)
-}
-
-
-#' Ensures that the filepath for the file to read exists
-#'
-#' @inheritParams read_azure_parquet
-#' @param ext The standard file extension for the file type, e.g. "json"
-#' @keywords internal
-check_blob_exists <- function(container, file, ext, info, path) {
-  stopifnot("no container found" = inherits(container, "blob_container"))
-  path <- if (path %in% c("", "/")) "" else path
-  stopifnot("path not found" = AzureStor::blob_dir_exists(container, path))
-  dir_name <- if (dirname(file) == ".") "" else dirname(file)
-  # Potentially the user could provide a partial file path in `path` and a
-  # further sub-directory as part of `file`. This handles that eventuality,
-  # though this usage pattern should be quite rare!
-  dpath <- file.path(path, dir_name)
-  fname <- basename(file)
-  if (nzchar(ext) && !gregg(fname, "\\.{ext}$")) {
-    fname <- glue::glue("{fname}.{ext}")
-  }
-  # remove duplicate slashes and any initial slashes
-  file_path <- sub("^/", "", gsub("/+", "/", file.path(dpath, fname)))
-
-  filepath_out <- AzureStor::list_blobs(container, dpath, recursive = FALSE) |>
-    dplyr::filter(dplyr::if_any("name", \(x) x == {{ file_path }})) |>
-    dplyr::pull("name")
-
-  msg1 <- ct_error_msg("no matching {ext} file found")
-  msg2 <- cst_error_msg("multiple matching {ext} files found")
-  check_that(filepath_out, \(x) length(x) > 0, msg1) # check length > 0
-  check_scalar_type(filepath_out, "character", msg2) # check length == 1
-
-  info_option <- getOption("azkit.info")
-  stopifnot(rlang::is_scalar_logical(info) || is.null(info))
-  stopifnot(rlang::is_scalar_logical(info_option) || is.null(info_option))
-  if (info %||% info_option %||% rlang::is_interactive()) {
-    cli::cli_alert_info("File {.val {filepath_out}} will be read in")
-  }
-  filepath_out
+read_azure_file <- function(container, file, ...) {
+  # using `dest = NULL` means pass the data through as a raw vector
+  AzureStor::download_blob(container, file, dest = NULL, ...)
 }