-
Notifications
You must be signed in to change notification settings - Fork 20
Develop pandas3 prep #217
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Develop pandas3 prep #217
Changes from all commits
10696cd
d3c1253
924c9af
d105c54
2493af2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,88 @@ | ||
| # Note: This code must be run from the tests dir due to importing | ||
| # the `convert` directory where the regression_base file lives | ||
| # todo: make this convert path passable by argument | ||
| import numpy as np | ||
| from convert.regression_base import RegressTest | ||
|
|
||
| case = "test10specl" | ||
| base_case = "test10" | ||
| #case = "testcbp" | ||
| tdir = "/opt/model/HSPsquared/tests" | ||
| #import_uci(str(hsp2_specl_uci), str(temp_specl_h5file)) | ||
| #run(temp_specl_h5file, saveall=True, compress=False) | ||
|
|
||
| ############# RegressTest data loader | ||
| ## TEST CASE | ||
| test = RegressTest(case, threads=1) | ||
| # test object hydr | ||
| rchres_hydr_hsp2_test_table = test.hsp2_data._read_table('RCHRES', '001', 'HYDR') | ||
| perlnd_pwat_hsp2_test_table = test.hsp2_data._read_table('PERLND', '001', 'PWATER') | ||
| rchres_hydr_hsp2_test = test.hsp2_data.get_time_series('RCHRES', '001', 'RO', 'HYDR') | ||
| rchres_hydr_hspf_test = test.get_hspf_time_series( ('RCHRES', 'HYDR', '001', 'RO', 2)) #Note: the order of arguments is wonky in Regressbase | ||
| rchres_hydr_hsp2_test_mo = rchres_hydr_hsp2_test.resample('MS').mean() | ||
| rchres_hydr_hspf_test_mo = rchres_hydr_hspf_test.resample('MS').mean() | ||
| ## BASE CASE | ||
| base = RegressTest(base_case, threads=1) | ||
| # base object hydr | ||
| rchres_hydr_hsp2_base_table = base.hsp2_data._read_table('RCHRES', '001', 'HYDR') | ||
| perlnd_pwat_hsp2_base_table = base.hsp2_data._read_table('PERLND', '001', 'PWATER') | ||
| rchres_hydr_hsp2_base = base.hsp2_data.get_time_series('RCHRES', '001', 'RO', 'HYDR') | ||
| rchres_hydr_hspf_base = base.get_hspf_time_series( ('RCHRES', 'HYDR', '001', 'RO', 2)) #Note: the order of arguments is wonky in Regressbase | ||
| rchres_hydr_hsp2_base_mo = rchres_hydr_hsp2_base.resample('MS').mean() | ||
| rchres_hydr_hspf_base_mo = rchres_hydr_hspf_base.resample('MS').mean() | ||
|
|
||
| # Show quantiles | ||
| print("hsp2", case, np.quantile(rchres_hydr_hsp2_test, [0,0.25,0.5,0.75,1.0])) | ||
| print("hspf", case, np.quantile(rchres_hydr_hspf_test, [0,0.25,0.5,0.75,1.0])) | ||
| print("hsp2", base_case, np.quantile(rchres_hydr_hsp2_base, [0,0.25,0.5,0.75,1.0])) | ||
| print("hspf", base_case, np.quantile(rchres_hydr_hspf_base, [0,0.25,0.5,0.75,1.0])) | ||
| # Monthly mean value comparisons | ||
| rchres_hydr_hsp2_test_mo | ||
| rchres_hydr_hspf_test_mo | ||
| rchres_hydr_hsp2_base_mo | ||
| rchres_hydr_hspf_base_mo | ||
|
|
||
| # Compare ANY arbitrary timeseries, not just the ones coded into the RegressTest object | ||
| # 3rd argument is tolerance to use | ||
| tol = 10.0 | ||
| test.compare_time_series(rchres_hydr_hsp2_base_table['RO'], rchres_hydr_hsp2_test_table['RO'], tol) | ||
| # Example: (True, 8.7855425) | ||
|
|
||
| # Compare inflows and outflows | ||
| np.mean(perlnd_pwat_hsp2_test_table['PERO']) * 6000 * 0.0833 | ||
| np.mean(rchres_hydr_hsp2_test_table['IVOL']) | ||
| np.mean(rchres_hydr_hsp2_test_table['ROVOL']) | ||
| np.mean(perlnd_pwat_hsp2_base_table['PERO']) * 6000 * 0.0833 | ||
| np.mean(rchres_hydr_hsp2_base_table['IVOL']) | ||
| np.mean(rchres_hydr_hsp2_base_table['ROVOL']) | ||
|
|
||
| # now do a comparison | ||
| # HYDR diff should be almost nonexistent | ||
| test.check_con(params = ('RCHRES', 'HYDR', '001', 'ROVOL', 2)) | ||
| # this is very large for PWTGAS | ||
| # git: test10specl ('PERLND', 'PWTGAS', '001', 'POHT', '2') 1163640% | ||
| test.check_con(params = ('PERLND', 'PWTGAS', '001', 'POHT', '2')) | ||
| # Other mismatches in PERLND | ||
| test.check_con(params = ('PERLND', 'PWATER', '001', 'AGWS', '2')) | ||
| test.check_con(params = ('PERLND', 'PWATER', '001', 'PERO', '2')) | ||
| # Now run the full test | ||
| test.quiet = True # this lets us test without overwhelming the console | ||
| results = test.run_test() | ||
| found = False | ||
| mismatches = [] | ||
| for key, results in results.items(): | ||
| no_data_hsp2, no_data_hspf, match, diff = results | ||
| if any([no_data_hsp2, no_data_hspf]): | ||
| continue | ||
| if not match: | ||
| mismatches.append((case, key, results)) | ||
| found = True | ||
|
|
||
| print(mismatches) | ||
|
|
||
| if mismatches: | ||
| for case, key, results in mismatches: | ||
| diff = results | ||
| print(case, key, f"{diff:0.00%}") | ||
| else: | ||
| print("No mismatches found. Success!") |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -214,46 +214,47 @@ def transform(ts, name, how, siminfo): | |
| """ | ||
|
|
||
| tsfreq = ts.index.freq | ||
| freq = Minute(siminfo["delt"]) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could all these changes be avoided by calling
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I volunteer to test this alternative so we don’t have to change the primary semantics of our ‘freq’ variable and add a new separate one just to drive resampling.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. tests on that PR currently fail on purpose, since pandas<3.0 is not merged yet. |
||
| fmins = Minute(siminfo["delt"]) | ||
| freq = fmins.nanos | ||
| stop = siminfo["stop"] | ||
|
|
||
| # append duplicate of last point to force processing last full interval | ||
| if ts.index[-1] < stop: | ||
| ts[stop] = ts.iloc[-1] | ||
|
|
||
| if freq == tsfreq: | ||
| if freq == tsfreq.nanos: | ||
| pass | ||
| elif tsfreq is None: # Sparse time base, frequency not defined | ||
| ts = ts.reindex(siminfo["tbase"]).ffill().bfill() | ||
| elif how == "SAME": | ||
| ts = ts.resample(freq).ffill() # tsfreq >= freq assumed, or bad user choice | ||
| ts = ts.resample(fmins).ffill() # tsfreq.nanos >= freq assumed, or bad user choice | ||
| elif not how: | ||
| if name in flowtype: | ||
| if "Y" in str(tsfreq) or "M" in str(tsfreq) or tsfreq > freq: | ||
| if "Y" in str(tsfreq) or "M" in str(tsfreq) or tsfreq.nanos > freq: | ||
| if "M" in str(tsfreq): | ||
| ratio = 1.0 / 730.5 | ||
| elif "Y" in str(tsfreq): | ||
| ratio = 1.0 / 8766.0 | ||
| else: | ||
| ratio = freq / tsfreq | ||
| ts = (ratio * ts).resample(freq).ffill() # HSP2 how = div | ||
| ratio = freq / tsfreq.nanos | ||
| ts = (ratio * ts).resample(fmins).ffill() # HSP2 how = div | ||
| else: | ||
| ts = ts.resample(freq).sum() | ||
| ts = ts.resample(fmins).sum() | ||
| else: | ||
| if "Y" in str(tsfreq) or "M" in str(tsfreq) or tsfreq > freq: | ||
| ts = ts.resample(freq).ffill() | ||
| if "Y" in str(tsfreq) or "M" in str(tsfreq) or tsfreq.nanos > freq: | ||
| ts = ts.resample(fmins).ffill() | ||
| else: | ||
| ts = ts.resample(freq).mean() | ||
| ts = ts.resample(fmins).mean() | ||
| elif how == "MEAN": | ||
| ts = ts.resample(freq).mean() | ||
| ts = ts.resample(fmins).mean() | ||
| elif how == "SUM": | ||
| ts = ts.resample(freq).sum() | ||
| ts = ts.resample(fmins).sum() | ||
| elif how == "MAX": | ||
| ts = ts.resample(freq).max() | ||
| ts = ts.resample(fmins).max() | ||
| elif how == "MIN": | ||
| ts = ts.resample(freq).min() | ||
| ts = ts.resample(fmins).min() | ||
| elif how == "LAST": | ||
| ts = ts.resample(freq).ffill() | ||
| ts = ts.resample(fmins).ffill() | ||
| elif how == "DIV": | ||
| if "Y" in str(tsfreq) or "M" in str(tsfreq): | ||
| mult = 1 | ||
|
|
@@ -267,14 +268,14 @@ def transform(ts, name, how, siminfo): | |
| elif "Y" in str(tsfreq): | ||
| ratio = 1.0 / (8766.0 * mult) | ||
| else: | ||
| ratio = freq / tsfreq | ||
| ts = (ratio * ts).resample(freq).ffill() # HSP2 how = div | ||
| ratio = freq / tsfreq.nanos | ||
| ts = (ratio * ts).resample(fmins).ffill() # HSP2 how = div | ||
| else: | ||
| ts = (ts * (freq / ts.index.freq)).resample(freq).ffill() | ||
| ts = (ts * (freq / tsfreq.nanos)).resample(fmins).ffill() | ||
| elif how == "ZEROFILL": | ||
| ts = ts.resample(freq).fillna(0.0) | ||
| ts = ts.resample(fmins).fillna(0.0) | ||
| elif how == "INTERPOLATE": | ||
| ts = ts.resample(freq).interpolate() | ||
| ts = ts.resample(fmins).interpolate() | ||
| else: | ||
| print(f"UNKNOWN method in TRANS, {how}") | ||
| return zeros(1) | ||
|
|
@@ -287,7 +288,8 @@ def hoursval(siminfo, hours24, dofirst=False, lapselike=False): | |
| """create hours flags, flag on the hour or lapse table over full simulation""" | ||
| start = siminfo["start"] | ||
| stop = siminfo["stop"] | ||
| freq = Minute(siminfo["delt"]) | ||
| fmins = Minute(siminfo["delt"]) | ||
| freq = fmins.nanos | ||
|
|
||
| dr = date_range( | ||
| start=f"{start.year}-01-01", end=f"{stop.year}-12-31", freq=Minute(60) | ||
|
|
@@ -297,16 +299,17 @@ def hoursval(siminfo, hours24, dofirst=False, lapselike=False): | |
| hours[0] = 1 | ||
|
|
||
| ts = Series(hours[0 : len(dr)], dr) | ||
| tsfreq = ts.index.freq | ||
| if lapselike: | ||
| if ts.index.freq > freq: # upsample | ||
| ts = ts.resample(freq).asfreq().ffill() | ||
| elif ts.index.freq < freq: # downsample | ||
| ts = ts.resample(freq).mean() | ||
| if tsfreq.nanos > freq: # upsample | ||
| ts = ts.resample(fmins).asfreq().ffill() | ||
| elif tsfreq.nanos < freq: # downsample | ||
| ts = ts.resample(fmins).mean() | ||
| else: | ||
| if ts.index.freq > freq: # upsample | ||
| ts = ts.resample(freq).asfreq().fillna(0.0) | ||
| elif ts.index.freq < freq: # downsample | ||
| ts = ts.resample(freq).max() | ||
| if tsfreq.nanos > freq: # upsample | ||
| ts = ts.resample(fmins).asfreq().fillna(0.0) | ||
| elif tsfreq.nanos < freq: # downsample | ||
| ts = ts.resample(fmins).max() | ||
| return ts.truncate(start, stop).to_numpy() | ||
|
|
||
|
|
||
|
|
@@ -321,16 +324,18 @@ def monthval(siminfo, monthly): | |
| """returns value at start of month for all times within the month""" | ||
| start = siminfo["start"] | ||
| stop = siminfo["stop"] | ||
| freq = Minute(siminfo["delt"]) | ||
| fmins = Minute(siminfo["delt"]) | ||
| freq = fmins.nanos | ||
|
|
||
| months = tile(monthly, stop.year - start.year + 1).astype(float) | ||
| dr = date_range(start=f"{start.year}-01-01", end=f"{stop.year}-12-31", freq="MS") | ||
| ts = Series(months, index=dr).resample("D").ffill() | ||
| tsfreq = ts.index.freq | ||
|
|
||
| if ts.index.freq > freq: # upsample | ||
| ts = ts.resample(freq).asfreq().ffill() | ||
| elif ts.index.freq < freq: # downsample | ||
| ts = ts.resample(freq).mean() | ||
| if tsfreq.nanos > freq: # upsample | ||
| ts = ts.resample(fmins).asfreq().ffill() | ||
| elif tsfreq.nanos < freq: # downsample | ||
| ts = ts.resample(fmins).mean() | ||
| return ts.truncate(start, stop).to_numpy() | ||
|
|
||
|
|
||
|
|
@@ -339,16 +344,19 @@ def dayval(siminfo, monthly): | |
| interpolation to day, but constant within day""" | ||
| start = siminfo["start"] | ||
| stop = siminfo["stop"] | ||
| freq = Minute(siminfo["delt"]) | ||
| fmins = Minute(siminfo["delt"]) | ||
| freq = fmins.nanos | ||
|
|
||
| months = tile(monthly, stop.year - start.year + 1).astype(float) | ||
| dr = date_range(start=f"{start.year}-01-01", end=f"{stop.year}-12-31", freq="MS") | ||
| ts = Series(months, index=dr).resample("D").interpolate("time") | ||
| tsfreq = ts.index.freq | ||
|
|
||
|
|
||
| if ts.index.freq > freq: # upsample | ||
| ts = ts.resample(freq).ffill() | ||
| elif ts.index.freq < freq: # downsample | ||
| ts = ts.resample(freq).mean() | ||
| if tsfreq.nanos > freq: # upsample | ||
| ts = ts.resample(fmins).ffill() | ||
| elif tsfreq.nanos < freq: # downsample | ||
| ts = ts.resample(fmins).mean() | ||
| return ts.truncate(start, stop).to_numpy() | ||
|
|
||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@timcera @PaulDudaRESPEC can we get a couple more sets of eyes on this file? Robert and I talked about this and I left some notes/questions/comments/concerns in #216. I'm not sure why it's in this pandas3 prep PR, maybe Robert can chime in about how it relates to pandas 3 or maybe we should move it to its own PR if it it's important to Robert. Moving this to its own PR could help us all collaborate and understand the need for this file, why it's a good example, and then how to best share/communicate the example to the public in our repo.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@austinorr @timcera @PaulDudaRESPEC
It's a testing file. I know it is useful for development, and having sample dev code synchronously in git record with the developments that are happening IS actually a best practice IMO.
Especially since we are firmly in the development stage, not release stage, trying to eliminate every file that conflicts with goals of directory tree purity seems onerous and discourages development.