Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ dependencies:
# Running HSP2
- scipy # Scipy also installs numpy
# Pandas installs most scientific Python modules, such as Numpy, etc.
- pandas
- pandas <3.0.0
- numba
- numpy
- hdf5
Expand Down
88 changes: 88 additions & 0 deletions examples/pretest/cmd_regression.py
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@timcera @PaulDudaRESPEC can we get a couple more sets of eyes on this file? Robert and I talked about this and I left some notes/questions/comments/concerns in #216. I'm not sure why it's in this pandas3 prep PR, maybe Robert can chime in about how it relates to pandas 3 or maybe we should move it to its own PR if it it's important to Robert. Moving this to its own PR could help us all collaborate and understand the need for this file, why it's a good example, and then how to best share/communicate the example to the public in our repo.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@austinorr @timcera @PaulDudaRESPEC

It's a testing file. I know it is useful for development, and having sample dev code synchronously in git record with the developments that are happening IS actually a best practice IMO.

Especially since we are firmly in the development stage, not release stage, trying to eliminate every file that conflicts with goals of directory tree purity seems onerous and discourages development.

Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# Note: This code must be run from the tests dir due to importing
# the `convert` directory where the regression_base file lives
# todo: make this convert path passable by argument
import numpy as np
from convert.regression_base import RegressTest

case = "test10specl"
base_case = "test10"
#case = "testcbp"
tdir = "/opt/model/HSPsquared/tests"
#import_uci(str(hsp2_specl_uci), str(temp_specl_h5file))
#run(temp_specl_h5file, saveall=True, compress=False)

############# RegressTest data loader
## TEST CASE
test = RegressTest(case, threads=1)
# test object hydr
rchres_hydr_hsp2_test_table = test.hsp2_data._read_table('RCHRES', '001', 'HYDR')
perlnd_pwat_hsp2_test_table = test.hsp2_data._read_table('PERLND', '001', 'PWATER')
rchres_hydr_hsp2_test = test.hsp2_data.get_time_series('RCHRES', '001', 'RO', 'HYDR')
rchres_hydr_hspf_test = test.get_hspf_time_series( ('RCHRES', 'HYDR', '001', 'RO', 2)) #Note: the order of arguments is wonky in Regressbase
rchres_hydr_hsp2_test_mo = rchres_hydr_hsp2_test.resample('MS').mean()
rchres_hydr_hspf_test_mo = rchres_hydr_hspf_test.resample('MS').mean()
## BASE CASE
base = RegressTest(base_case, threads=1)
# base object hydr
rchres_hydr_hsp2_base_table = base.hsp2_data._read_table('RCHRES', '001', 'HYDR')
perlnd_pwat_hsp2_base_table = base.hsp2_data._read_table('PERLND', '001', 'PWATER')
rchres_hydr_hsp2_base = base.hsp2_data.get_time_series('RCHRES', '001', 'RO', 'HYDR')
rchres_hydr_hspf_base = base.get_hspf_time_series( ('RCHRES', 'HYDR', '001', 'RO', 2)) #Note: the order of arguments is wonky in Regressbase
rchres_hydr_hsp2_base_mo = rchres_hydr_hsp2_base.resample('MS').mean()
rchres_hydr_hspf_base_mo = rchres_hydr_hspf_base.resample('MS').mean()

# Show quantiles
print("hsp2", case, np.quantile(rchres_hydr_hsp2_test, [0,0.25,0.5,0.75,1.0]))
print("hspf", case, np.quantile(rchres_hydr_hspf_test, [0,0.25,0.5,0.75,1.0]))
print("hsp2", base_case, np.quantile(rchres_hydr_hsp2_base, [0,0.25,0.5,0.75,1.0]))
print("hspf", base_case, np.quantile(rchres_hydr_hspf_base, [0,0.25,0.5,0.75,1.0]))
# Monthly mean value comparisons
rchres_hydr_hsp2_test_mo
rchres_hydr_hspf_test_mo
rchres_hydr_hsp2_base_mo
rchres_hydr_hspf_base_mo

# Compare ANY arbitrary timeseries, not just the ones coded into the RegressTest object
# 3rd argument is tolerance to use
tol = 10.0
test.compare_time_series(rchres_hydr_hsp2_base_table['RO'], rchres_hydr_hsp2_test_table['RO'], tol)
# Example: (True, 8.7855425)

# Compare inflows and outflows
np.mean(perlnd_pwat_hsp2_test_table['PERO']) * 6000 * 0.0833
np.mean(rchres_hydr_hsp2_test_table['IVOL'])
np.mean(rchres_hydr_hsp2_test_table['ROVOL'])
np.mean(perlnd_pwat_hsp2_base_table['PERO']) * 6000 * 0.0833
np.mean(rchres_hydr_hsp2_base_table['IVOL'])
np.mean(rchres_hydr_hsp2_base_table['ROVOL'])

# now do a comparison
# HYDR diff should be almost nonexistent
test.check_con(params = ('RCHRES', 'HYDR', '001', 'ROVOL', 2))
# this is very large for PWTGAS
# git: test10specl ('PERLND', 'PWTGAS', '001', 'POHT', '2') 1163640%
test.check_con(params = ('PERLND', 'PWTGAS', '001', 'POHT', '2'))
# Other mismatches in PERLND
test.check_con(params = ('PERLND', 'PWATER', '001', 'AGWS', '2'))
test.check_con(params = ('PERLND', 'PWATER', '001', 'PERO', '2'))
# Now run the full test
test.quiet = True # this lets us test without overwhelming the console
results = test.run_test()
found = False
mismatches = []
for key, results in results.items():
no_data_hsp2, no_data_hspf, match, diff = results
if any([no_data_hsp2, no_data_hspf]):
continue
if not match:
mismatches.append((case, key, results))
found = True

print(mismatches)

if mismatches:
for case, key, results in mismatches:
diff = results
print(case, key, f"{diff:0.00%}")
else:
print("No mismatches found. Success!")
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ dependencies = [
"cltoolbox",
"numba",
"numpy<2.0",
"pandas",
"pandas<3.0.0",
"tables",
"pyparsing"
]
Expand Down
12 changes: 6 additions & 6 deletions src/hsp2/hsp2/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -704,23 +704,23 @@ def get_flows(
t = data_frame[smemn].astype(float64).to_numpy()[0:steps]

if MFname in ts and AFname in ts:
t *= ts[MFname][:steps] * ts[AFname][0:steps]
t = t * ts[MFname][:steps] * ts[AFname][0:steps]
msg(4, f"MFACTOR modified by timeseries {MFname}")
msg(4, f"AFACTR modified by timeseries {AFname}")
elif MFname in ts:
t *= afactr * ts[MFname][0:steps]
t = t * afactr * ts[MFname][0:steps]
msg(4, f"MFACTOR modified by timeseries {MFname}")
elif AFname in ts:
t *= mfactor * ts[AFname][0:steps]
t = t * mfactor * ts[AFname][0:steps]
msg(4, f"AFACTR modified by timeseries {AFname}")
else:
t *= factor
t = t * factor

# if poht to iheat, imprecision in hspf conversion factor requires a slight adjustment
if (smemn == "POHT" or smemn == "SOHT") and tmemn == "IHEAT":
t *= 0.998553
t = t * 0.998553
if (smemn == "PODOXM" or smemn == "SODOXM") and tmemn == "OXIF1":
t *= 1.000565
t = t * 1.000565

# ??? ISSUE: can fetched data be at different frequency - don't know how to transform.
if tmemn in ts:
Expand Down
86 changes: 47 additions & 39 deletions src/hsp2/hsp2/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,46 +214,47 @@ def transform(ts, name, how, siminfo):
"""

tsfreq = ts.index.freq
freq = Minute(siminfo["delt"])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could all these changes be avoided by calling .to_unit(‘ns’) here?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I volunteer to test this alternative so we don’t have to change the primary semantics of our ‘freq’ variable and add a new separate one just to drive resampling.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@rburghol what about this idea in #221? It modifies far fewer lines, uses the same pattern in each function, and lets the variable freq be used in both comparisons and in the resample call.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tests on that PR currently fail on purpose, since pandas<3.0 is not merged yet.

fmins = Minute(siminfo["delt"])
freq = fmins.nanos
stop = siminfo["stop"]

# append duplicate of last point to force processing last full interval
if ts.index[-1] < stop:
ts[stop] = ts.iloc[-1]

if freq == tsfreq:
if freq == tsfreq.nanos:
pass
elif tsfreq is None: # Sparse time base, frequency not defined
ts = ts.reindex(siminfo["tbase"]).ffill().bfill()
elif how == "SAME":
ts = ts.resample(freq).ffill() # tsfreq >= freq assumed, or bad user choice
ts = ts.resample(fmins).ffill() # tsfreq.nanos >= freq assumed, or bad user choice
elif not how:
if name in flowtype:
if "Y" in str(tsfreq) or "M" in str(tsfreq) or tsfreq > freq:
if "Y" in str(tsfreq) or "M" in str(tsfreq) or tsfreq.nanos > freq:
if "M" in str(tsfreq):
ratio = 1.0 / 730.5
elif "Y" in str(tsfreq):
ratio = 1.0 / 8766.0
else:
ratio = freq / tsfreq
ts = (ratio * ts).resample(freq).ffill() # HSP2 how = div
ratio = freq / tsfreq.nanos
ts = (ratio * ts).resample(fmins).ffill() # HSP2 how = div
else:
ts = ts.resample(freq).sum()
ts = ts.resample(fmins).sum()
else:
if "Y" in str(tsfreq) or "M" in str(tsfreq) or tsfreq > freq:
ts = ts.resample(freq).ffill()
if "Y" in str(tsfreq) or "M" in str(tsfreq) or tsfreq.nanos > freq:
ts = ts.resample(fmins).ffill()
else:
ts = ts.resample(freq).mean()
ts = ts.resample(fmins).mean()
elif how == "MEAN":
ts = ts.resample(freq).mean()
ts = ts.resample(fmins).mean()
elif how == "SUM":
ts = ts.resample(freq).sum()
ts = ts.resample(fmins).sum()
elif how == "MAX":
ts = ts.resample(freq).max()
ts = ts.resample(fmins).max()
elif how == "MIN":
ts = ts.resample(freq).min()
ts = ts.resample(fmins).min()
elif how == "LAST":
ts = ts.resample(freq).ffill()
ts = ts.resample(fmins).ffill()
elif how == "DIV":
if "Y" in str(tsfreq) or "M" in str(tsfreq):
mult = 1
Expand All @@ -267,14 +268,14 @@ def transform(ts, name, how, siminfo):
elif "Y" in str(tsfreq):
ratio = 1.0 / (8766.0 * mult)
else:
ratio = freq / tsfreq
ts = (ratio * ts).resample(freq).ffill() # HSP2 how = div
ratio = freq / tsfreq.nanos
ts = (ratio * ts).resample(fmins).ffill() # HSP2 how = div
else:
ts = (ts * (freq / ts.index.freq)).resample(freq).ffill()
ts = (ts * (freq / tsfreq.nanos)).resample(fmins).ffill()
elif how == "ZEROFILL":
ts = ts.resample(freq).fillna(0.0)
ts = ts.resample(fmins).fillna(0.0)
elif how == "INTERPOLATE":
ts = ts.resample(freq).interpolate()
ts = ts.resample(fmins).interpolate()
else:
print(f"UNKNOWN method in TRANS, {how}")
return zeros(1)
Expand All @@ -287,7 +288,8 @@ def hoursval(siminfo, hours24, dofirst=False, lapselike=False):
"""create hours flags, flag on the hour or lapse table over full simulation"""
start = siminfo["start"]
stop = siminfo["stop"]
freq = Minute(siminfo["delt"])
fmins = Minute(siminfo["delt"])
freq = fmins.nanos

dr = date_range(
start=f"{start.year}-01-01", end=f"{stop.year}-12-31", freq=Minute(60)
Expand All @@ -297,16 +299,17 @@ def hoursval(siminfo, hours24, dofirst=False, lapselike=False):
hours[0] = 1

ts = Series(hours[0 : len(dr)], dr)
tsfreq = ts.index.freq
if lapselike:
if ts.index.freq > freq: # upsample
ts = ts.resample(freq).asfreq().ffill()
elif ts.index.freq < freq: # downsample
ts = ts.resample(freq).mean()
if tsfreq.nanos > freq: # upsample
ts = ts.resample(fmins).asfreq().ffill()
elif tsfreq.nanos < freq: # downsample
ts = ts.resample(fmins).mean()
else:
if ts.index.freq > freq: # upsample
ts = ts.resample(freq).asfreq().fillna(0.0)
elif ts.index.freq < freq: # downsample
ts = ts.resample(freq).max()
if tsfreq.nanos > freq: # upsample
ts = ts.resample(fmins).asfreq().fillna(0.0)
elif tsfreq.nanos < freq: # downsample
ts = ts.resample(fmins).max()
return ts.truncate(start, stop).to_numpy()


Expand All @@ -321,16 +324,18 @@ def monthval(siminfo, monthly):
"""returns value at start of month for all times within the month"""
start = siminfo["start"]
stop = siminfo["stop"]
freq = Minute(siminfo["delt"])
fmins = Minute(siminfo["delt"])
freq = fmins.nanos

months = tile(monthly, stop.year - start.year + 1).astype(float)
dr = date_range(start=f"{start.year}-01-01", end=f"{stop.year}-12-31", freq="MS")
ts = Series(months, index=dr).resample("D").ffill()
tsfreq = ts.index.freq

if ts.index.freq > freq: # upsample
ts = ts.resample(freq).asfreq().ffill()
elif ts.index.freq < freq: # downsample
ts = ts.resample(freq).mean()
if tsfreq.nanos > freq: # upsample
ts = ts.resample(fmins).asfreq().ffill()
elif tsfreq.nanos < freq: # downsample
ts = ts.resample(fmins).mean()
return ts.truncate(start, stop).to_numpy()


Expand All @@ -339,16 +344,19 @@ def dayval(siminfo, monthly):
interpolation to day, but constant within day"""
start = siminfo["start"]
stop = siminfo["stop"]
freq = Minute(siminfo["delt"])
fmins = Minute(siminfo["delt"])
freq = fmins.nanos

months = tile(monthly, stop.year - start.year + 1).astype(float)
dr = date_range(start=f"{start.year}-01-01", end=f"{stop.year}-12-31", freq="MS")
ts = Series(months, index=dr).resample("D").interpolate("time")
tsfreq = ts.index.freq


if ts.index.freq > freq: # upsample
ts = ts.resample(freq).ffill()
elif ts.index.freq < freq: # downsample
ts = ts.resample(freq).mean()
if tsfreq.nanos > freq: # upsample
ts = ts.resample(fmins).ffill()
elif tsfreq.nanos < freq: # downsample
ts = ts.resample(fmins).mean()
return ts.truncate(start, stop).to_numpy()


Expand Down
6 changes: 3 additions & 3 deletions src/hsp2/hsp2tools/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@ def run(h5file, saveall=True, compress=True):
[optional] Default is True.
use compression on the save h5 file.
"""
hdf5_instance = HDF5(h5file)
io_manager = IOManager(hdf5_instance)
main(io_manager, saveall=saveall, jupyterlab=compress)
with HDF5(h5file) as hdf5_instance:
io_manager = IOManager(hdf5_instance)
main(io_manager, saveall=saveall, jupyterlab=compress)


def import_uci(ucifile, h5file):
Expand Down
5 changes: 3 additions & 2 deletions tests/convert/regression_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def __init__(
self.tcodes = tcodes
self.ids = ids
self.threads = threads

self.quiet = False # allows users to set this later
self._init_files()

def _init_files(self):
Expand Down Expand Up @@ -185,7 +185,8 @@ def run_test(self) -> Dict[OperationsTuple, ResultsTuple]:
def check_con(self, params: OperationsTuple) -> ResultsTuple:
"""Performs comparision of single constituent"""
operation, activity, id, constituent, tcode = params
print(f" {operation}_{id} {activity} {constituent}\n")
if not self.quiet:
print(f" {operation}_{id} {activity} {constituent}\n")

ts_hsp2 = self.hsp2_data.get_time_series(operation, id, constituent, activity)
ts_hspf = self.get_hspf_time_series(params)
Expand Down