Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,5 @@
^_pkgdown\.yml$
^docs$
^pkgdown$
^\.claude$
^claude\.md$
8 changes: 5 additions & 3 deletions .github/workflows/R-CMD-check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ env:

jobs:
document:
if: github.event_name != 'pull_request'
runs-on: ubuntu-latest

steps:
Expand Down Expand Up @@ -47,10 +48,11 @@ jobs:
run: |
git config --local user.email "actions@github.com"
git config --local user.name "GitHub Actions"
git pull
git fetch origin ${{ github.ref_name }}
git checkout ${{ github.ref_name }}
git add -f man/\* NAMESPACE
git commit -m 'Documentation' || echo "No changes to commit"
git push origin || echo "No changes to commit"
git commit -m 'Documentation [automated]' || echo "No changes to commit"
git push origin ${{ github.ref_name }} || echo "No changes to commit"

R-CMD-check:
if: ${{ always() }}
Expand Down
7 changes: 3 additions & 4 deletions .github/workflows/document-and-deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,13 @@ jobs:
"

- name: commit
if: github.event_name != 'pull_request'
run: |
git config --local user.email "actions@github.com"
git config --local user.name "GitHub Actions"
git add -f man/\* NAMESPACE
git commit -m 'Documentation' || echo "No changes to commit"
git push origin || echo "No changes to commit"
git commit -m 'Documentation [automated]' || echo "No changes to commit"
git push origin HEAD:${{ github.ref_name }} || echo "No changes to commit"

- name: Create pkgdown
env:
Expand All @@ -72,7 +73,6 @@ jobs:
R -e "
remotes::install_github('${{ github.repository }}', ref = '${{ github.ref_name }}', force = TRUE);
rsconnect::setAccountInfo(name='forrt-replications', token=${{secrets.SHINYAPPS_TOKEN}}, secret=${{secrets.SHINYAPPS_SECRET}});
rsconnect::deployApp(appName = 'fred_annotator', appDir = './inst/fred_annotator', forceUpdate = TRUE);
rsconnect::deployApp(appName = 'fred_explorer', appDir = './inst/fred_explorer', forceUpdate = TRUE);
"

Expand All @@ -81,5 +81,4 @@ jobs:
run: |
R -e "
rsconnect::deployApp(appName = 'fred_explorer_release', appDir = './inst/fred_explorer', forceUpdate = TRUE);
rsconnect::deployApp(appName = 'fred_annotator_release', appDir = './inst/fred_annotator', forceUpdate = TRUE);
"
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: FReD
Title: Interfaces to the FORRT Replication Database
Version: 0.0.0.9002
Version: 0.2.0
Authors@R: c(
person("Lukas", "Röseler", , "lukas.roeseler@uni-muenster.de", role = "aut",
comment = c(ORCID = "0000-0002-6446-1901")),
Expand Down
2 changes: 2 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
YEAR: 2024
COPYRIGHT HOLDER: FReD authors
10 changes: 10 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
This version history contains noteworthy changes. For a full history of changes, see the [commit history](https://github.com/forrtproject/FReD/commits/main/)

# FReD 0.2.0

## Breaking Changes
- **New data source URL**: The package now uses a new data source with updated variable naming conventions
- **Variable naming convention**: All variable names now use `_o` suffix for original study variables and `_r` suffix for replication study variables (e.g., `es_original` → `es_o`, `n_replication` → `n_r`, `ref_original` → `ref_o`, `doi_replication` → `doi_r`)
- **`run_annotator()` deprecated**: The local annotator app has been removed. `run_annotator()` now opens the web version at forrt.org instead

## Notes
- If you have code that references old variable names, you will need to update it to use the new `_o`/`_r` suffixes

# FReD 0.1.0

## New features
Expand Down
129 changes: 72 additions & 57 deletions R/data_management.R
Original file line number Diff line number Diff line change
Expand Up @@ -94,37 +94,26 @@ read_fred <- function(data = get_param("FRED_DATA_FILE"), retain_es_as_character

tryCatch({

red <- safe_read_xl(data, url = get_param("FRED_DATA_URL"), sheet = "Data") # .xlsx file
red <- red[-(1:2), ] # exclude labels and "X" column

forrt <- safe_read_xl(data, url = get_param("FRED_DATA_URL"), sheet = "FORRT R&R (editable)", startRow = 1)
forrt <- forrt[-(1:2), ] # exclude labels and "X" column
forrt <- forrt[!(forrt$doi_original %in% red$doi_original), ] # exclude forrt entries of original study that already appear in FReD (based on DOIs)

# additional studies
as <- safe_read_xl(data, url = get_param("FRED_DATA_URL"), sheet = "Additional Studies to be added", startRow = 2)
as$id <- paste("uncoded_studies_", rownames(as), sep = "")
as <- as[as$`Study.listed.in.ReD?` != "1.0", ] # exclude additional studies that are already listed in the main dataset
as <- as[!is.na(as$doi_original), ] # exclude studies for which doi_original is unavailable because they will not be findable in the annotator anyway

numeric_variables <- c("n_original", "n_replication", "es_orig_value", "es_rep_value",
"validated", "published_rep", "same_design", "same_test",
"original_authors",
"significant_original", "significant_replication", "power")


if (!retain_es_as_character) {
numeric_variables <- c(numeric_variables, "es_orig_RRR", "es_rep_RRR")
# New data format: single sheet with all data
fred_data <- safe_read_xl(data, url = get_param("FRED_DATA_URL"), sheet = 1)

# Add id column if not present (use fred_id or row number)
if (!"id" %in% names(fred_data)) {
if ("fred_id" %in% names(fred_data)) {
fred_data$id <- fred_data$fred_id
} else {
fred_data$id <- seq_len(nrow(fred_data))
}
}

numeric_variables <- c("n_o", "n_r", "es_value_o", "es_value_r", "pval_value_o", "pval_value_r")

# Only coerce variables that exist in the dataset
numeric_variables <- intersect(numeric_variables, names(fred_data))

# Assuming 'red' and 'forrt' have a unique ID column named "id"
red <- coerce_to_numeric(red, numeric_variables, id_var = "id", verbose = verbose)
forrt <- coerce_to_numeric(forrt, numeric_variables, id_var = "id", verbose = verbose)
fred_data <- coerce_to_numeric(fred_data, numeric_variables, id_var = "id", verbose = verbose)

# merge the data, aligning column types where one is character (as empty colums are imported as numeric)
return(bind_rows_with_characters(red, forrt, as))
return(fred_data)

}, error = function(e) {
return(return_inbuilt("data"))
Expand All @@ -149,6 +138,7 @@ coerce_to_numeric <- function(df, numeric_vars, id_var, verbose = TRUE) {
problematic_entries <- list()

for (var in numeric_vars) {
# Suppress "NAs introduced by coercion" warnings - we detect and report these ourselves below
problematic_rows <- which(!is.na(df[[var]]) & is.na(suppressWarnings(as.numeric(df[[var]]))))

if (length(problematic_rows) > 0) {
Expand Down Expand Up @@ -187,50 +177,75 @@ coerce_to_numeric <- function(df, numeric_vars, id_var, verbose = TRUE) {

clean_variables <- function(fred_data) {

# Initialize columns that may not exist in new data format
if (!"description" %in% names(fred_data)) fred_data$description <- ""
if (!"tags" %in% names(fred_data)) fred_data$tags <- NA
if (!"contributors" %in% names(fred_data)) fred_data$contributors <- NA
if (!"result" %in% names(fred_data)) fred_data$result <- NA
if (!"notes" %in% names(fred_data)) fred_data$notes <- NA
if (!"exclusion" %in% names(fred_data)) fred_data$exclusion <- NA
if (!"validated" %in% names(fred_data)) fred_data$validated <- 1
if (!"osf_link" %in% names(fred_data)) {
fred_data$osf_link <- ifelse(!is.na(fred_data$url_r), fred_data$url_r, NA)
}
if (!"source" %in% names(fred_data)) fred_data$source <- NA
if (!"orig_journal" %in% names(fred_data)) {
fred_data$orig_journal <- if ("journal_o" %in% names(fred_data)) fred_data$journal_o else NA
}

# recode variables for app to work
fred_data$pc_tags <- NA
fred_data$pc_contributors <- NA
fred_data$description <- ifelse(is.na(fred_data$description), "", fred_data$description)
fred_data$contributors <- ifelse(is.na(fred_data$contributors), fred_data$pc_contributors, fred_data$contributors)
fred_data$tags <- ifelse(is.na(fred_data$tags), fred_data$pc_tags, fred_data$tags)
fred_data$subjects <- NA
fred_data$description <- ifelse(is.na(fred_data$description), fred_data$pc_title, fred_data$description)

fred_data$closeness <- NA
fred_data$result <- ifelse(fred_data$result == "0", NA, fred_data$result)
fred_data$result <- ifelse(!is.na(fred_data$result) & fred_data$result == "0", NA, fred_data$result)

fred_data$result

# compute year the original study was published (match 1800-2099 only, and require consecutive numbers)
fred_data$orig_year <- as.numeric(gsub(".*((18|19|20)\\d{2}).*", "\\1", fred_data$ref_original))
# compute year the original study was published - use year_o if available, otherwise extract from ref_o
if ("year_o" %in% names(fred_data)) {
fred_data$orig_year <- as.numeric(fred_data$year_o)
} else {
fred_data$orig_year <- as.numeric(gsub(".*((18|19|20)\\d{2}).*", "\\1", fred_data$ref_o))
}

# # delete duplicates and non-replication studies
fred_data <- fred_data[fred_data$notes != "duplicate" | is.na(fred_data$notes), ] # ADDED: study exclusions due to duplicates
fred_data <- fred_data[fred_data$notes != "No actual replication conducted" | is.na(fred_data$notes), ] # ADDED: some registrations had no corresponding replication study
# delete duplicates and non-replication studies (only if notes column exists and has values)
if ("notes" %in% names(fred_data)) {
fred_data <- fred_data[is.na(fred_data$notes) | fred_data$notes != "duplicate", ]
fred_data <- fred_data[is.na(fred_data$notes) | fred_data$notes != "No actual replication conducted", ]
}

# remove entries with reasons for exclusions
fred_data <- fred_data[is.na(fred_data$exclusion), ]
# remove entries with reasons for exclusions (only if exclusion column exists)
if ("exclusion" %in% names(fred_data)) {
fred_data <- fred_data[is.na(fred_data$exclusion), ]
}

# Collapse validated categories (# 2: error detected and corrected)
fred_data$validated <- ifelse(fred_data$validated == 1 | fred_data$validated == 2, 1, fred_data$validated)

if ("validated" %in% names(fred_data)) {
fred_data$validated <- ifelse(fred_data$validated == 1 | fred_data$validated == 2, 1, fred_data$validated)
}

# Strip DOIs by removing everything before first 10.
fred_data$doi_original <- gsub("^.*?(10\\.\\d+/.*$)", "\\1", fred_data$doi_original) %>% str_trim_base()
fred_data$doi_replication <- gsub("^.*?(10\\.\\d+/.*$)", "\\1", fred_data$doi_replication) %>% str_trim_base()
if ("doi_o" %in% names(fred_data)) {
fred_data$doi_o <- gsub("^.*?(10\\.\\d+/.*$)", "\\1", fred_data$doi_o) %>% str_trim_base()
}
if ("doi_r" %in% names(fred_data)) {
fred_data$doi_r <- gsub("^.*?(10\\.\\d+/.*$)", "\\1", fred_data$doi_r) %>% str_trim_base()
}

# Remove DOIs from references
fred_data$ref_original <- fred_data$ref_original %>%
stringr::str_remove_all("https?://(dx\\.)?doi\\.org/10\\.[^ >,]+") %>%
stringr::str_remove_all("doi:10\\.[^ >,]+") %>%
stringr::str_remove_all("10\\.[^ >,]+") %>%
str_trim_base()

fred_data$ref_replication <- fred_data$ref_replication %>%
stringr::str_remove_all("https?://(dx\\.)?doi\\.org/10\\.[^ >,]+") %>%
stringr::str_remove_all("doi:10\\.[^ >,]+") %>%
stringr::str_remove_all("10\\.[^ >,]+") %>%
str_trim_base()
if ("ref_o" %in% names(fred_data)) {
fred_data$ref_o <- fred_data$ref_o %>%
stringr::str_remove_all("https?://(dx\\.)?doi\\.org/10\\.[^ >,]+") %>%
stringr::str_remove_all("doi:10\\.[^ >,]+") %>%
stringr::str_remove_all("10\\.[^ >,]+") %>%
str_trim_base()
}

if ("ref_r" %in% names(fred_data)) {
fred_data$ref_r <- fred_data$ref_r %>%
stringr::str_remove_all("https?://(dx\\.)?doi\\.org/10\\.[^ >,]+") %>%
stringr::str_remove_all("doi:10\\.[^ >,]+") %>%
stringr::str_remove_all("10\\.[^ >,]+") %>%
str_trim_base()
}

fred_data
}
Expand Down
50 changes: 25 additions & 25 deletions R/effect_size_transformations.R
Original file line number Diff line number Diff line change
Expand Up @@ -350,8 +350,8 @@ as_numeric_verbose <- function(x, quiet = FALSE) {
#' @param coalesce_values Logical. Should existing values in es_type_columns be retained?
#' @return FReD dataset with additional columns for common effect sizes

add_common_effect_sizes <- function(fred_data, es_value_columns = c("es_orig_value", "es_rep_value"),
es_type_columns = c("es_orig_estype", "es_rep_estype"), es_common_names = c("es_original", "es_replication"),
add_common_effect_sizes <- function(fred_data, es_value_columns = c("es_value_o", "es_value_r"),
es_type_columns = c("es_type_o", "es_type_r"), es_common_names = c("es_o", "es_r"),
coalesce_values = TRUE) {
if (!all.equal(length(es_value_columns), length(es_type_columns), length(es_common_names))) {
stop("Length of es_value_columns, es_type_columns, and es_common_names must be equal")
Expand Down Expand Up @@ -399,7 +399,7 @@ add_common_effect_sizes <- function(fred_data, es_value_columns = c("es_orig_val
#' @param es_replication Character. Name of replication effect size column.
#' @return Augmented FReD dataset with aligned effect directions.

align_effect_direction <- function(fred_data, es_original = "es_original", es_replication = "es_replication") {
align_effect_direction <- function(fred_data, es_original = "es_o", es_replication = "es_r") {
orig_direction <- sign(fred_data[, es_original])
fred_data[, es_original] <- abs(fred_data[, es_original])
fred_data[, es_replication] <- fred_data[, es_replication] * orig_direction
Expand All @@ -421,16 +421,16 @@ align_effect_direction <- function(fred_data, es_original = "es_original", es_re
#'
#' @noRd
#' @examples
#' fred_data <- data.frame(es_original = c(0.3, 0.5), es_replication = c(0.4, 0.6),
#' n_original = c(30, 40), n_replication = c(50, 60))
#' fred_data <- data.frame(es_o = c(0.3, 0.5), es_r = c(0.4, 0.6),
#' n_o = c(30, 40), n_r = c(50, 60))
#' add_uncertainty(fred_data)

add_uncertainty <- function(fred_data, es_value_columns = c("es_original", "es_replication"),
N_columns = c("n_original", "n_replication"),
vi_columns = c("vi_original", "vi_replication"),
ci_lower_columns = c("ci.lower_original", "ci.lower_replication"),
ci_upper_columns = c("ci.upper_original", "ci.upper_replication"),
p_values = c("p_value_original", "p_value_replication")) {
add_uncertainty <- function(fred_data, es_value_columns = c("es_o", "es_r"),
N_columns = c("n_o", "n_r"),
vi_columns = c("vi_o", "vi_r"),
ci_lower_columns = c("ci.lower_o", "ci.lower_r"),
ci_upper_columns = c("ci.upper_o", "ci.upper_r"),
p_values = c("p_value_o", "p_value_r")) {
if (!all.equal(length(es_value_columns), length(N_columns), length(vi_columns), length(ci_lower_columns), length(ci_upper_columns))) {
stop("Length of all column character vectors must be equal")
}
Expand Down Expand Up @@ -476,12 +476,12 @@ add_uncertainty <- function(fred_data, es_value_columns = c("es_original", "es_r
#' @importFrom dplyr mutate case_when

code_replication_outcomes <- function(fred_data,
es_original = "es_original",
p_original = "p_value_original",
p_replication = "p_value_replication",
ci_lower_replication = "ci.lower_replication",
ci_upper_replication = "ci.upper_replication",
es_replication = "es_replication") {
es_original = "es_o",
p_original = "p_value_o",
p_replication = "p_value_r",
ci_lower_replication = "ci.lower_r",
ci_upper_replication = "ci.upper_r",
es_replication = "es_r") {

# Convert column names to symbols for dplyr evaluation
es_original_sym <- dplyr::sym(es_original)
Expand Down Expand Up @@ -529,7 +529,7 @@ code_replication_outcomes <- function(fred_data,
#' @param power_column Character. Name of target column for power.
#' @return Augmented FReD dataset with power column.

add_replication_power <- function(fred_data, es_original = "es_original", N_replication = "n_replication", power_column = "power_r") {
add_replication_power <- function(fred_data, es_original = "es_o", N_replication = "n_r", power_column = "power_r") {
# NA where N_replication is missing
fred_data[, power_column] <- NA
# Return 0 where sample_replication < 4, as pwr.r.test does not work for n < 4
Expand Down Expand Up @@ -587,8 +587,8 @@ p_from_r <- function(r, N) {
#' @param vi_columns Character vector of target columns for sampling variances
#' @return FReD dataset with additional columns for sampling variances (metafor's `vi`)

add_sampling_variances <- function(fred_data, es_value_columns = c("es_original", "es_replication"),
N_columns = c("n_original", "n_replication"), vi_columns = c("vi_original", "vi_replication")) {
add_sampling_variances <- function(fred_data, es_value_columns = c("es_o", "es_r"),
N_columns = c("n_o", "n_r"), vi_columns = c("vi_o", "vi_r")) {
if (!all.equal(length(es_value_columns), length(N_columns))) {
stop("Length of es_value_columns, N_columns and vi_columns must be equal")
}
Expand Down Expand Up @@ -619,19 +619,19 @@ add_sampling_variances <- function(fred_data, es_value_columns = c("es_original"
augment_for_zcurve <- function(fred_data) {

# Ensure fred_data has required columns
if (!all(c("es_original", "n_original") %in% names(fred_data))) {
stop("fred_data must contain es_original and n_original columns")
if (!all(c("es_o", "n_o") %in% names(fred_data))) {
stop("fred_data must contain es_o and n_o columns")
}

# Initialize se and z as NA
fred_data$se <- fred_data$z <- NA

valid_indices <- !(is.na(fred_data$es_original) | is.na(fred_data$n_original) | fred_data$n_original <= 3)
valid_indices <- !(is.na(fred_data$es_o) | is.na(fred_data$n_o) | fred_data$n_o <= 3)

if (any(valid_indices)) {
# Fisher's z transformation
z <- 0.5 * (log(1 + fred_data$es_original[valid_indices]) - log(1 - fred_data$es_original[valid_indices]))
fred_data$se[valid_indices] <- 1 / sqrt(fred_data$n_original[valid_indices] - 3)
z <- 0.5 * (log(1 + fred_data$es_o[valid_indices]) - log(1 - fred_data$es_o[valid_indices]))
fred_data$se[valid_indices] <- 1 / sqrt(fred_data$n_o[valid_indices] - 3)
fred_data$z[valid_indices] <- z / fred_data$se[valid_indices]
}

Expand Down
16 changes: 10 additions & 6 deletions R/run_apps.R
Original file line number Diff line number Diff line change
Expand Up @@ -143,18 +143,22 @@ run_explorer <- function(offer_install = interactive(), in_background = NULL, au

#' Run the Replication Annotator
#'
#' Running this function will launch the FReD Replication Annotator shiny app
#' @description
#' `r lifecycle::badge("deprecated")`
#'
#' @return Replication Annotator shiny app
#' @inheritParams run_app
#' This function previously launched a local Shiny app but now opens the
#' hosted web version of the FReD Annotator.
#'
#' @return Opens the web annotator in the default browser (invisibly returns NULL).
#' @export
#' @examples
#' if (interactive()) {
#' # To run the Replication Annotator app:
#' run_annotator()
#' }
run_annotator <- function(offer_install = interactive(), in_background = NULL, auto_close = interactive(), port = 3839, timeout = 30) {
run_app(offer_install = offer_install, app = "fred_annotator", in_background = in_background, auto_close = auto_close, port = port, timeout = timeout)
run_annotator <- function() {
.Deprecated(msg = "The local annotator app has been deprecated. Opening the web version.")
utils::browseURL("http://forrt.org/apps/fred_annotator.html")
invisible(NULL)
}

#' Get the date of last modification
Expand Down
Loading
Loading