Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 24 additions & 41 deletions reports/ny_aeba_grid/notebooks/analysis.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -29,48 +29,31 @@ source("/workspaces/reports2/lib/ggplot/switchbox_theme.R")

```{r}

path_nyiso_hourly_load_csv <- "/workspaces/reports2/data/nyiso/hourly/20151025-20251026 NYISO Hourly Actual Load.csv"
path_nyiso_hourly_load_parquet <- "/workspaces/reports2/data/nyiso/hourly/nyiso_hourly_load.parquet"

# Based on the image, the CSV's columns are: Date, Load, Zone (and probably more for time parsing)
# Let's cleanly parse the timestamp, extract date/hour, and aggregate total load (sum) across all zones per hour

# first look for the parquet, fallback to loading and parsing CSV if not found
if (file.exists(path_nyiso_hourly_load_parquet)) {
nyiso_hourly_load <- read_parquet(path_nyiso_hourly_load_parquet)
} else {
nyiso_hourly_load <- read_csv(path_nyiso_hourly_load_csv)

nyiso_hourly_load <- nyiso_hourly_load |>
# Rename columns to lowercase for easier handling if needed
rename(
datetime = Date,
load = Load,
zone = Zone
) |>
# ensure load is numeric
mutate(load = as.numeric(load)) |>
# Parse the datetime (given as e.g. "10/25/2015 7:00:00 PM" -- note double space between date and hour)
mutate(
datetime = lubridate::mdy_hms(
datetime,
tz = "America/New_York",
quiet = TRUE
),
year = lubridate::year(datetime),
month = lubridate::month(datetime),
day = lubridate::day(datetime),
hour = lubridate::hour(datetime)
) |>
select(-datetime)

# save to parquet
write_parquet(
nyiso_hourly_load,
"/workspaces/reports2/data/nyiso/hourly/nyiso_hourly_load.parquet"
)
}
# Read processed NYISO hourly load data from S3
# Set up S3 filesystem with arrow
s3_bucket <- arrow::s3_bucket(
bucket = "data.sb",
region = "us-west-2"
)

s3_file_path <- "ny_aeba_grid/nyiso/hourly/nyiso_hourly_load.parquet"

nyiso_hourly_load <- tryCatch(
{
arrow::read_parquet(s3_bucket$path(s3_file_path))
},
error = function(e) {
cat("\n")
cat("ERROR: Could not read processed NYISO hourly load data from S3.\n")
cat("Bucket: data.sb\n")
cat("Path:", s3_file_path, "\n")
cat("Error details:", conditionMessage(e), "\n\n")
cat("The processed parquet file may not exist yet, or there may be an AWS credentials issue.\n")
cat("Please run the data processing script first:\n")
cat(" Rscript reports/ny_aeba_grid/utils/make_hourly_nyiso_load.R\n\n")
stop("Missing required data file on S3", call. = FALSE)
}
)

# add a "NY_STATE" zone, which is the sum of all zones
nyiso_monthly_peak_load <- nyiso_hourly_load |>
Expand Down
65 changes: 65 additions & 0 deletions reports/ny_aeba_grid/utils/make_hourly_nyiso_load.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#!/usr/bin/env Rscript

# Utility script to process NYISO hourly load data from S3
# Reads raw CSV, processes it, and uploads parquet to S3

library(tidyverse)
library(arrow)
library(lubridate)

# Set up S3 filesystem
s3_bucket <- arrow::s3_bucket(
bucket = "data.sb",
region = "us-west-2"
)

# S3 file paths (without bucket prefix)
s3_csv_file <- "ny_aeba_grid/nyiso/hourly/20151025-20251026 NYISO Hourly Actual Load.csv"
s3_parquet_file <- "ny_aeba_grid/nyiso/hourly/nyiso_hourly_load.parquet"

cat("Starting NYISO hourly load data processing...\n")
cat("Reading CSV from S3: data.sb/", s3_csv_file, "\n", sep = "")

# Read CSV from S3
nyiso_hourly_load <- arrow::read_csv_arrow(s3_bucket$path(s3_csv_file))

cat("CSV loaded. Processing data...\n")

# Apply transformations
nyiso_hourly_load <- nyiso_hourly_load |>
# Rename columns to lowercase for easier handling if needed
rename(
datetime = Date,
load = Load,
zone = Zone
) |>
# ensure load is numeric
mutate(load = as.numeric(load)) |>
# Parse the datetime (given as e.g. "10/25/2015 7:00:00 PM" -- note double space between date and hour)
mutate(
datetime = lubridate::mdy_hms(
datetime,
tz = "America/New_York",
quiet = TRUE
),
year = lubridate::year(datetime),
month = lubridate::month(datetime),
day = lubridate::day(datetime),
hour = lubridate::hour(datetime)
) |>
select(-datetime)

cat("Data processed successfully.\n")
cat("Writing parquet to S3: data.sb/", s3_parquet_file, "\n", sep = "")

# Write parquet to S3
arrow::write_parquet(
nyiso_hourly_load,
s3_bucket$path(s3_parquet_file)
)

cat("✓ Parquet file uploaded to S3 successfully!\n")
cat("Summary:\n")
cat(" - Total rows:", nrow(nyiso_hourly_load), "\n")
cat(" - Columns:", paste(names(nyiso_hourly_load), collapse = ", "), "\n")
cat(" - Year range:", min(nyiso_hourly_load$year), "to", max(nyiso_hourly_load$year), "\n")
2 changes: 1 addition & 1 deletion reports/ri_hp_rates/notebooks/analysis.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ path_heat_pump_plots <- file.path(path_to_lib, "rates_analysis", "heat_pump_rate
path_create_housing_units <- file.path(path_to_lib, "rates_analysis", "create_sb_housing_units.R")

# Data paths
path_monthly_data <- file.path(path_to_data, "resstock", "2024_release2_tmy3", "load_curve_monthly")
path_monthly_data <- file.path(path_to_data, "resstock", "2024_release2_tmy3_2", "load_curve_monthly")
path_supply_year_metadata_dir <- file.path(path_to_data, "resstock", "2024_release2_tmy3", "metadata")
path_fuel_oil_supply_rates <- file.path(path_to_data, "eia", "heating_oil", "ri_eia_heating_oil_prices_monthly.parquet")
path_propane_supply_rates <- file.path(path_to_data, "eia", "propane", "ri_eia_propane_prices_monthly.parquet")
Expand Down