RTIInternational · szimmer · May 29, 2026 · May 29, 2026 · May 29, 2026 · May 29, 2026
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -7,3 +7,5 @@
 ^_pkgdown\.yml$
 ^docs$
 ^pkgdown$
+^\.positai$
+^\.claude$
diff --git a/.gitignore b/.gitignore
@@ -7,3 +7,4 @@
 .DS_Store
 .quarto
 docs
+.positai
diff --git a/NAMESPACE b/NAMESPACE
@@ -17,4 +17,5 @@ importFrom(data.table,.NGRP)
 importFrom(data.table,.SD)
 importFrom(data.table,data.table)
 importFrom(rlang,.data)
+importFrom(rlang,.env)
 importFrom(stats,runif)
diff --git a/R/SampleSelectR-package.R b/R/SampleSelectR-package.R
@@ -11,7 +11,8 @@
 #' @importFrom data.table .SD
 #' @importFrom data.table :=
 #' @importFrom data.table data.table
-#' @importFrom stats runif
 #' @importFrom rlang .data
+#' @importFrom rlang .env
+#' @importFrom stats runif
 ## usethis namespace: end
 NULL
diff --git a/R/allocate.R b/R/allocate.R
@@ -279,7 +279,10 @@ allocate <- function(allocation, N.h, n.samp = NULL, S.h = NULL, c.h = NULL, cos
     }
   }
   # Calculate the total (raw) sample size
-  n <- max(ceiling(sum(allocations)), length(N.h) * lbound)
+
+
+  final_n <- max(ceiling(sum(allocations)), length(N.h) * lbound)
+
 
   sizes <- allocations
 
@@ -292,7 +295,7 @@ allocate <- function(allocation, N.h, n.samp = NULL, S.h = NULL, c.h = NULL, cos
   total_allocated <- sum(adjusted_allocations)
 
   # Calculate the difference from the total n
-  difference <- n - total_allocated
+  difference <- final_n - total_allocated
 
   # If difference is positive, distribute it proportionally
   if (difference != 0) {
@@ -314,7 +317,7 @@ allocate <- function(allocation, N.h, n.samp = NULL, S.h = NULL, c.h = NULL, cos
 
   # Step 5: Adjust the rounded allocations to ensure the sum equals n
   total_allocated <- sum(rounded_allocations)
-  difference <- n - total_allocated
+  difference <- final_n - total_allocated
   # Adjust the allocations by adding/subtracting the difference
   while (difference != 0) {
     i <- sample(1:num_groups, 1) # Randomly select an index
@@ -345,7 +348,7 @@ allocate <- function(allocation, N.h, n.samp = NULL, S.h = NULL, c.h = NULL, cos
   }
   outputs <- as.integer(rounded_allocations)
   if (allocation == "optimal") {
-    n.print <- n
+    n.print <- final_n
   } else {
     n.print <- n.samp
   }

diff --git a/R/chromy_pps.R b/R/chromy_pps.R
@@ -53,8 +53,8 @@ chromy_pps <- function(frame, n, mos, outall = FALSE, curstrat = NULL) {
   frame_hits <-
     frame |>
     tidytable::mutate(
-      ExpectedHits = exphits,
-      NumberHits = chromy_inner(exphits),
+      ExpectedHits = .env$exphits,
+      NumberHits = chromy_inner(.env$exphits),
       SelectionIndicator = .data$NumberHits > 0,
       SamplingWeight = ifelse(.data$SelectionIndicator, 1 / .data$ExpectedHits, NA),
     )

diff --git a/R/srs.R b/R/srs.R
@@ -28,25 +28,30 @@
 
 #' @export
 
+
 srs <- function(frame, n, outall = FALSE, curstrat = NULL) {
   check_frame_type(frame)
   check_n(n, frame, curstrat, n_le_N = TRUE)
   check_outall(outall)
 
   N <- nrow(frame)
 
-  # Take the srs and create sampling-related columns
+  # Take the SRS and create sampling-related columns
   selectedVector <- sample(x = N, size = n, replace = FALSE)
+
   frame <- frame |>
     tidytable::mutate(
       rowNum = tidytable::row_number(),
-      SelectionProbability = n / N,
-      SamplingWeight = ifelse(.data$rowNum %in% selectedVector, N / n, NA),
-      SelectionIndicator = ifelse(.data$rowNum %in% selectedVector, TRUE, FALSE)
+      SelectionProbability = .env$n / .env$N,
+      SamplingWeight = tidytable::if_else(
+        .data$rowNum %in% .env$selectedVector,
+        .env$N / .env$n,
+        NA_real_
+      ),
+      SelectionIndicator = .data$rowNum %in% .env$selectedVector
     ) |>
     tidytable::select(-tidytable::all_of("rowNum"))
 
-
   # Output to screen
   Sampling_Output(n, N, curstrat = curstrat)
 
@@ -55,8 +60,9 @@ srs <- function(frame, n, outall = FALSE, curstrat = NULL) {
     sample <- frame |>
       tidytable::filter(.data$SelectionIndicator) |>
       tidytable::select(-tidytable::all_of("SelectionIndicator"))
+
     return(sample)
-  } else if (outall) {
+  } else {
     return(frame)
   }
 }
diff --git a/R/sys.R b/R/sys.R
@@ -34,6 +34,7 @@
 #'
 #' @export
 
+
 sys <- function(frame, n, curstrat = NULL, outall = FALSE) {
   check_frame_type(frame)
   check_n(n, frame, curstrat, n_le_N = TRUE)
@@ -42,32 +43,32 @@ sys <- function(frame, n, curstrat = NULL, outall = FALSE) {
   N <- nrow(frame)
 
   # Sampling method
-
   k <- N / n # Sampling interval
 
-  r <- runif(1, 1, k) # We use a random start between 1 and k
+  r <- runif(1, 1, k) # Random start between 1 and k
 
   selectedVector <- floor(r + k * (0:(n - 1))) # Selected row indices
 
-  # We make sure that selected indices are within frame range
-
+  # Make sure selected indices are within frame range
   selectedVector <- selectedVector[selectedVector <= N]
 
-  # Creating variables accordingly
+  # Create variables
   frame <- frame |>
     tidytable::mutate(
       numrow = tidytable::row_number(),
-      SelectionIndicator = .data$numrow %in% selectedVector,
-      SelectionProbability = n / N,
-      SamplingWeight = ifelse(.data$SelectionIndicator, N / n, NA)
+      SelectionIndicator = .data$numrow %in% .env$selectedVector,
+      SelectionProbability = .env$n / .env$N,
+      SamplingWeight = tidytable::if_else(
+        .data$SelectionIndicator,
+        .env$N / .env$n,
+        NA_real_
+      )
     ) |>
     tidytable::select(-tidytable::all_of("numrow"))
 
   # Output to screen
   Sampling_Output(n, N, k = k, r = r, curstrat = curstrat)
-
-  # Return only selected rows and make sure the selected sample is a data.frame, tibble, or data.table
-
+  # Return only the sample or the frame with selection indicator based on value of outall
   if (outall) {
     return(frame)
   } else {

diff --git a/R/sys_pps.R b/R/sys_pps.R
@@ -47,7 +47,7 @@ sys_pps <- function(frame, n, mos, outall = FALSE, curstrat = NULL) {
   tbd_data_1 <- frame |>
     tidytable::mutate(
       rowNum = tidytable::row_number(),
-      ExpectedHits = n * (!!(symbol_mos) / totalSize),
+      ExpectedHits = .env$n * (!!(symbol_mos) / .env$totalSize),
       SamplingWeight = .data$ExpectedHits^-1
     )
 
@@ -64,11 +64,12 @@ sys_pps <- function(frame, n, mos, outall = FALSE, curstrat = NULL) {
   selectedVector <- findInterval(selectedSizePoints, sizeIntervals)
 
   # Using selectedVector, get the total counts of each index
+
   selectedVector_counts <- selectedVector |>
     as.data.frame() |>
-    tidytable::count(selectedVector) |>
-    # Rename to NumberHits
-    tidytable::rename(NumberHits = n)
+    tidytable::count(selectedVector, name = "NumberHits")
+
+
 
   tbd_data_2 <- tbd_data_1 |>
     tidytable::left_join(
@@ -78,7 +79,7 @@ sys_pps <- function(frame, n, mos, outall = FALSE, curstrat = NULL) {
     # Need to zero filled NumberHits
     tidytable::mutate(
       NumberHits = tidytable::replace_na(.data$NumberHits, replace = 0),
-      SelectionIndicator = .data$rowNum %in% selectedVector,
+      SelectionIndicator = .data$rowNum %in% .env$selectedVector,
       # Make SamplingWeight to be NA if not selected
       SamplingWeight = tidytable::case_when(
         SelectionIndicator == TRUE ~ .data$SamplingWeight,

diff --git a/R/util.R b/R/util.R
@@ -1,3 +1,4 @@
+
 #' Check if the frame is a valid data structure
 #'
 #' Ensures that the input frame is a data.frame, data.table, or tibble.

diff --git a/man/SampleSelectR-package.Rd b/man/SampleSelectR-package.Rd
diff --git a/tests/testthat/test-select_sample.R b/tests/testthat/test-select_sample.R
@@ -0,0 +1,147 @@
+test_that("n on frame does not cause issue - chromy_pps", {
+  county_2023_slim_n <- county_2023 |>
+    tidytable::select(GEOID, Region, Pop_Tot) |>
+    tidytable::mutate(
+      n = 50,
+      ExpHits_man = 10 * Pop_Tot / sum(Pop_Tot),
+      .by = "Region"
+    )
+
+  sampsizes <- county_2023_slim_n |>
+    tidytable::distinct(Region) |>
+    tidytable::mutate(sample_size = 10)
+
+  set.seed(12345)
+  samp1 <- county_2023_slim_n |>
+    select_sample(
+      "chromy_pps",
+      n = sampsizes,
+      strata = "Region",
+      mos = "Pop_Tot",
+      outall = TRUE
+    )
+  set.seed(12345)
+  samp2 <- county_2023_slim_n |>
+    tidytable::select(-n) |>
+    select_sample(
+      "chromy_pps",
+      n = sampsizes,
+      strata = "Region",
+      mos = "Pop_Tot",
+      outall = TRUE
+    )
+  expect_equal(
+    samp1 |> tidytable::select(-n),
+    samp2
+  )
+})
+
+test_that("n on frame does not cause issue - srs", {
+  county_2023_slim_n <- county_2023 |>
+    tidytable::select(GEOID, Region, Pop_Tot) |>
+    tidytable::mutate(
+      n = 50,
+      ExpHits_man = 10 * Pop_Tot / sum(Pop_Tot),
+      .by = "Region"
+    )
+
+  sampsizes <- county_2023_slim_n |>
+    tidytable::distinct(Region) |>
+    tidytable::mutate(sample_size = 10)
+
+  set.seed(12345)
+  samp1 <- county_2023_slim_n |>
+    select_sample(
+      "srs",
+      n = sampsizes,
+      strata = "Region",
+      outall = TRUE
+    )
+  set.seed(12345)
+  samp2 <- county_2023_slim_n |>
+    tidytable::select(-n) |>
+    select_sample(
+      "srs",
+      n = sampsizes,
+      strata = "Region",
+      outall = TRUE
+    )
+  expect_equal(
+    samp1 |> tidytable::select(-n),
+    samp2
+  )
+})
+
+test_that("n on frame does not cause issue - sys_pps", {
+  county_2023_slim_n <- county_2023 |>
+    tidytable::select(GEOID, Region, Pop_Tot) |>
+    tidytable::mutate(
+      n = 50,
+      ExpHits_man = 10 * Pop_Tot / sum(Pop_Tot),
+      .by = "Region"
+    )
+
+  sampsizes <- county_2023_slim_n |>
+    tidytable::distinct(Region) |>
+    tidytable::mutate(sample_size = 10)
+
+  set.seed(12345)
+  samp1 <- county_2023_slim_n |>
+    select_sample(
+      "sys_pps",
+      n = sampsizes,
+      strata = "Region",
+      mos = "Pop_Tot",
+      outall = TRUE
+    )
+  set.seed(12345)
+  samp2 <- county_2023_slim_n |>
+    tidytable::select(-n) |>
+    select_sample(
+      "sys_pps",
+      n = sampsizes,
+      strata = "Region",
+      mos = "Pop_Tot",
+      outall = TRUE
+    )
+  expect_equal(
+    samp1 |> tidytable::select(-n),
+    samp2
+  )
+})
+
+test_that("n on frame does not cause issue - sys", {
+  county_2023_slim_n <- county_2023 |>
+    tidytable::select(GEOID, Region, Pop_Tot) |>
+    tidytable::mutate(
+      n = 50,
+      ExpHits_man = 10 * Pop_Tot / sum(Pop_Tot),
+      .by = "Region"
+    )
+
+  sampsizes <- county_2023_slim_n |>
+    tidytable::distinct(Region) |>
+    tidytable::mutate(sample_size = 10)
+
+  set.seed(12345)
+  samp1 <- county_2023_slim_n |>
+    select_sample(
+      "sys",
+      n = sampsizes,
+      strata = "Region",
+      outall = TRUE
+    )
+  set.seed(12345)
+  samp2 <- county_2023_slim_n |>
+    tidytable::select(-n) |>
+    select_sample(
+      "sys",
+      n = sampsizes,
+      strata = "Region",
+      outall = TRUE
+    )
+  expect_equal(
+    samp1 |> tidytable::select(-n),
+    samp2
+  )
+})
-Original file line number
+Diff line change
@@ Expand Up / @@ -7,3 +7,4 @@ @@
     .DS_Store
     .quarto
     docs
+    .positai