Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
c5b2a1c
Add process_groups function
ptomasula Feb 19, 2021
152d7ed
Merge branch 'develop' into develop_readWDM
aufdenkampe Feb 22, 2021
90e7ff7
Adding rpo772.wdm
ptomasula Feb 22, 2021
bad3771
Numpy Array chunk allocation approach
ptomasula Feb 22, 2021
129a228
End of block on 511th element of record
ptomasula Feb 22, 2021
762b90c
Add WDM Programmers Guide
aufdenkampe Feb 22, 2021
b6fbef4
Adding rpo772.wdm debugging files
aufdenkampe Feb 22, 2021
0f5a2d1
alternative implementation of process_group:
htaolimno Mar 1, 2021
b600ebf
remove deprecated functions
ptomasula Mar 2, 2021
2561869
renaming internal functions
ptomasula Mar 2, 2021
2e205f1
tidy up readWDM
ptomasula Mar 3, 2021
8b701e9
Searchable WDMProgrammersGuide+Search+Nav.pdf with sidebar navigation
aufdenkampe Mar 4, 2021
ef2d8df
Upload HSPF v12.2 manual with added sidebar navigation
aufdenkampe Mar 5, 2021
0cfd7c2
Merge pull request #32 from LimnoTech/develop
bcous Mar 5, 2021
e5d64a1
Numba support
ptomasula Mar 5, 2021
677adaa
Merge branch 'develop_readWDM' of https://github.com/LimnoTech/HSPsqu…
ptomasula Mar 5, 2021
1a62f38
Explicit Int64 assignment for datetime
ptomasula Mar 9, 2021
1b52a17
numba compatible datetime converter
ptomasula Mar 9, 2021
9b19a92
Remove old datetime conversion
ptomasula Mar 12, 2021
2a8373e
Test10 no longer runs since readWDM time series updates
aufdenkampe Mar 30, 2021
2014cd0
Datetime shift fix
ptomasula Mar 30, 2021
b0edc39
Remove sparse timeseries fill
ptomasula Mar 31, 2021
a094cfe
Merge branch 'develop' into develop_readWDM
aufdenkampe Mar 31, 2021
0f1f7bc
Merge branch 'develop' into develop_readWDM
aufdenkampe Mar 31, 2021
c52eaad
TimeSeries dtypes change in readWDM
aufdenkampe Apr 8, 2021
2d09713
Update HDF5_compare notebook
aufdenkampe Apr 8, 2021
ca50dd0
Merge branch 'develop' into develop_readWDM
aufdenkampe Apr 9, 2021
543ea20
Stop datetime fix
ptomasula Apr 12, 2021
ae374f1
Adding missing lines to stop datetime fix
ptomasula Apr 12, 2021
0d910ed
Merge branch 'develop' into develop_readWDM
aufdenkampe Apr 14, 2021
6e6beab
Testing outputs. Fix worked!
aufdenkampe Apr 14, 2021
479e6d6
Sparse time series fill added back
aufdenkampe Apr 23, 2021
b9e635f
timeseries freq assignment
ptomasula Apr 23, 2021
1c450ae
Update conda env & notebooks
aufdenkampe Apr 23, 2021
ee429de
block control word indexing fix
ptomasula Apr 26, 2021
146036c
Error handling for irregular timeseries
ptomasula Apr 28, 2021
c62adb6
renaming private function
ptomasula Apr 28, 2021
5039cf2
Merge branch 'develop' into develop_readWDM2
ptomasula Apr 28, 2021
e651d08
rename bits_to_date
ptomasula Apr 28, 2021
4f4f15a
remove unnecessary dependencies
ptomasula Apr 28, 2021
12dc1d5
deprecation notices
ptomasula Apr 28, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion HSP2/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def transform(ts, name, how, siminfo):
if freq == tsfreq:
pass
elif tsfreq == None: # Sparse time base, frequency not defined
ts = ts.reindex(siminfo['tbase']).ffill().bfill()
ts = ts.reindex(siminfo['tbase']).ffill().bfill()
elif how == 'SAME':
ts = ts.resample(freq).ffill() # tsfreq >= freq assumed, or bad user choice
elif not how:
Expand Down
7 changes: 7 additions & 0 deletions HSP2tools/.vs/VSWorkspaceState.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"ExpandedNodes": [
""
],
"SelectedNode": "\\readWDM.py",
"PreviewInSolutionExplorer": false
}
256 changes: 208 additions & 48 deletions HSP2tools/readWDM.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,11 @@

import numpy as np
import pandas as pd
from numba import jit
from numba import jit, njit
import datetime

import warnings

# look up attributes NAME, data type (Integer; Real; String) and data length by attribute number
attrinfo = {1:('TSTYPE','S',4), 2:('STAID','S',16), 11:('DAREA','R',1),
17:('TCODE','I',1), 27:('TSBYR','I',1), 28:('TSBMO','I',1),
Expand All @@ -24,13 +26,20 @@
freq = {7:'100YS', 6:'YS', 5:'MS', 4:'D', 3:'H', 2:'min', 1:'S'} # pandas date_range() frequency by TCODE, TGROUP


def readWDM(wdmfile, hdffile, jupyterlab=True):
def readWDM(wdmfile, hdffile, compress_output=False):
iarray = np.fromfile(wdmfile, dtype=np.int32)
farray = np.fromfile(wdmfile, dtype=np.float32)

date_epoch = np.datetime64(0,'Y')
dt_year = np.timedelta64(1, 'Y')
dt_month = np.timedelta64(1, 'M')
dt_day = np.timedelta64(1, 'D')
dt_hour = np.timedelta64(1, 'h')
dt_minute = np.timedelta64(1, 'm')
dt_second = np.timedelta64(1, 's')

if iarray[0] != -998:
print('Not a WDM file, magic number is not -990. Stopping!')
return
raise ValueError ('Provided file does not match WDM format. First int32 should be -998.')
nrecords = iarray[28] # first record is File Definition Record
ntimeseries = iarray[31]

Expand All @@ -39,7 +48,7 @@ def readWDM(wdmfile, hdffile, jupyterlab=True):
if not (iarray[index]==0 and iarray[index+1]==0 and iarray[index+2]==0 and iarray[index+3]==0) and iarray[index+5]==1:
dsnlist.append(index)
if len(dsnlist) != ntimeseries:
print('PROGRAM ERROR, wrong number of DSN records found')
raise RuntimeError (f'Wrong number of Time Series Records found expecting:{ntimeseries} found:{len(dsnlist)}')

with pd.HDFStore(hdffile) as store:
summary = []
Expand All @@ -66,7 +75,7 @@ def readWDM(wdmfile, hdffile, jupyterlab=True):
elif atype == 'R':
dattr[name] = farray[ptr]
else:
dattr[name] = ''.join([itostr(iarray[k]) for k in range(ptr, ptr + length // 4)]).strip()
dattr[name] = ''.join([_inttostr(iarray[k]) for k in range(ptr, ptr + length // 4)]).strip()
if att not in dattr:
found_in_all = False
if found_in_all:
Expand Down Expand Up @@ -98,50 +107,49 @@ def readWDM(wdmfile, hdffile, jupyterlab=True):
elif atype == 'R':
dattr[name] = farray[ptr]
else:
dattr[name] = ''.join([itostr(iarray[k]) for k in range(ptr, ptr + length//4)]).strip()
dattr[name] = ''.join([_inttostr(iarray[k]) for k in range(ptr, ptr + length//4)]).strip()

# Get timeseries timebase data
records = []
records = []
offsets = []
for i in range(pdat+1, pdatv-1):
a = iarray[index+i]
if a != 0:
records.append(splitposition(a))
record, offset = _splitposition(a)
records.append(record)
offsets.append(offset)
if len(records) == 0:
continue # WDM preallocation, but nothing saved here yet

srec, soffset = records[0]
start = splitdate(iarray[srec*512 + soffset])

sprec, spoffset = splitposition(frepos)
finalindex = sprec * 512 + spoffset
continue

# calculate number of data points in each group, tindex is final index for storage
tgroup = dattr['TGROUP']
tstep = dattr['TSSTEP']
tcode = dattr['TCODE']
cindex = pd.date_range(start=start, periods=len(records)+1, freq=freq[tgroup])
tindex = pd.date_range(start=start, end=cindex[-1], freq=str(tstep) + freq[tcode])
counts = np.diff(np.searchsorted(tindex, cindex))

## Get timeseries data
floats = np.zeros(sum(counts), dtype=np.float32)
findex = 0
for (rec,offset),count in zip(records, counts):
findex = getfloats(iarray, farray, floats, findex, rec, offset, count, finalindex, tcode, tstep)

## Write to HDF5 file
series = pd.Series(floats[:findex], index=tindex[:findex])

records = np.asarray(records)
offsets = np.asarray(offsets)

dates, values, stop_datetime = _process_groups(iarray, farray, records, offsets, tgroup)
stop_datetime = datetime.datetime(*_bits_to_date(stop_datetime))
dates = np.array(dates)
dates_converted = _date_convert(dates, date_epoch, dt_year, dt_month, dt_day, dt_hour, dt_minute, dt_second)
series = pd.Series(values, index=dates_converted)
try:
series.index.freq = str(tstep) + freq[tcode]
except ValueError:
series.index.freq = None

dsname = f'TIMESERIES/TS{dsn:03d}'
# series.to_hdf(store, dsname, complib='blosc', complevel=9)
if jupyterlab:
series.to_hdf(store, dsname, complib='blosc', complevel=9) # This is the official version
if compress_output:
series.to_hdf(store, dsname, complib='blosc', complevel=9)
else:
series.to_hdf(store, dsname, format='t', data_columns=True) # show the columns in HDFView
series.to_hdf(store, dsname, format='t', data_columns=True)

data = [str(tindex[0]), str(tindex[-1]), str(tstep) + freq[tcode],
len(series), dattr['TSTYPE'], dattr['TFILL']]
data = [
str(series.index[0]), str(stop_datetime), str(tstep) + freq[tcode],
len(series), dattr['TSTYPE'], dattr['TFILL']
]
columns = ['Start', 'Stop', 'Freq','Length', 'TSTYPE', 'TFILL']
# search = ['STAID', 'STNAM', 'SCENARIO', 'CONSTITUENT','LOCATION']
for x in columns_to_add:
if x in dattr:
data.append(dattr[x])
Expand All @@ -150,11 +158,165 @@ def readWDM(wdmfile, hdffile, jupyterlab=True):
summary.append(data)
summaryindx.append(dsname[11:])


dfsummary = pd.DataFrame(summary, index=summaryindx, columns=columns)
store.put('TIMESERIES/SUMMARY',dfsummary, format='t', data_columns=True)

return dfsummary

@njit
def _splitdate(x):
year = np.int64(x >> 14)
month = np.int64(x >> 10 & 0xF)
day = np.int64(x >> 5 & 0x1F)
hour = np.int64(x & 0x1F)
return _correct_date(year, month, day, hour, 0,0)

@njit
def _splitcontrol(x):
nval = x >> 16
ltstep = x >> 10 & 0x3f
ltcode = x >> 7 & 0x7
comp = x >> 5 & 0x3
qual = x & 0x1f
return nval, ltstep, ltcode, comp, qual

@njit
def _splitposition(x):
return((x>>9) - 1, (x&0x1FF) - 1) #args: record, offset

@njit
def _inttostr(i):
return chr(i & 0xFF) + chr(i>>8 & 0xFF) + chr(i>>16 & 0xFF) + chr(i>>24 & 0xFF)

@njit
def _bits_to_date(x):
year = x >> 26
month = x >> 22 & 0xf
day = x >> 17 & 0x1f
hour = x >> 12 & 0x1f
minute = x >> 6 & 0x3f
second = x & 0x3f
return year, month, day, hour, minute, second

@njit
def _date_to_bits(year, month, day, hour, minute, second):
x = year << 26 | month << 22 | day << 17 | hour << 12 | minute << 6 | second
return x

@njit
def _increment_date(date, timecode, timestep):
year, month, day, hour, minute, second = _bits_to_date(date)

if timecode == 7: year += 100 * timestep
elif timecode == 6 : year += timestep
elif timecode == 5 : month += timestep
elif timecode == 4 : day += timestep
elif timecode == 3 : hour += timestep
elif timecode == 2 : minute += timestep
elif timecode == 1 : second += timestep

return _correct_date(year, month, day, hour, minute, second)

@njit
def _correct_date(year, month, day, hour, minute, second):
while second >= 60:
second -= 60
minute += 1
while minute >= 60:
minute -= 60
hour += 1
while hour >= 24:
hour -= 24
day += 1
while day > _days_in_month(year, month):
day -= _days_in_month(year, month)
month += 1
while month > 12:
month -= 12
year += 1
return _date_to_bits(year, month, day, hour, minute, second)

@njit
def _days_in_month(year, month):
if month > 12: month %= 12

if month in (1,3,5,7,8,10,12):
return 31
elif month in (4,6,9,11):
return 30
elif month == 2:
if _is_leapyear(year): return 29
else: return 28

@njit
def _is_leapyear(year):
if year % 400 == 0:
return True
if year % 100 == 0:
return False
if year % 4 == 0:
return True
else:
return False

@njit
def _date_convert(dates, date_epoch, dt_year, dt_month, dt_day, dt_hour, dt_minute, dt_second):
converted_dates = []
for x in dates:
year, month, day, hour, minute, second = _bits_to_date(x)
date = date_epoch
date += (year - 1970) * dt_year
date += (month - 1) * dt_month
date += (day - 1) * dt_day
date += hour * dt_hour
date += minute * dt_minute
date += second * dt_second
converted_dates.append(date)
return converted_dates

@njit
def _process_groups(iarray, farray, records, offsets, tgroup):
date_array = [0] #need initialize with a type for numba
value_array = [0.0]

for i in range(0,len(records)):
record = records[i]
offset = offsets[i]
index = record * 512 + offset
pscfwr = iarray[record * 512 + 3] #should be 0 for last record in timeseries
current_date = _splitdate(iarray[index])
group_enddate = _increment_date(current_date, tgroup, 1)
offset +=1
index +=1

while current_date < group_enddate:
nval, ltstep, ltcode, comp, qual = _splitcontrol(iarray[index])
#compressed - only has single value which applies to full range
if comp == 1:
for i in range(0, nval, 1):
date_array.append(current_date)
current_date = _increment_date(current_date, ltcode, ltstep)
value_array.append(farray[index + 1])
index += 2
offset +=2
else:
for i in range(0, nval, 1):
date_array.append(current_date)
current_date = _increment_date(current_date, ltcode, ltstep)
value_array.append(farray[index + 1 + i])
index += 1 + nval
offset +=1 + nval

if offset >= 511:
offset = 4
index = (pscfwr - 1) * 512 + offset
record = pscfwr
pscfwr = iarray[(record - 1) * 512 + 3] #should be 0 for last record in timeseries

date_array = date_array[1:]
value_array = value_array[1:]

return date_array, value_array, group_enddate

'''
Get single time series data from a WDM file
Expand Down Expand Up @@ -289,41 +451,39 @@ def get_wdm_data_set(wdmfile, attributes):

return None

########################
### legacy functions ###
########################

def todatetime(yr=1900, mo=1, dy=1, hr=0):
'''takes yr,mo,dy,hr information then returns its datetime64'''
warnings.warn("use '_date_convert' instead; Removed for numba compatible datetime handling; reference commit 1b52a1736e45a497ccdf78cd6e2eab8c0b7a444f", DeprecationWarning)
if hr == 24:
return datetime.datetime(yr, mo, dy, 23) + pd.Timedelta(1,'h')
else:
return datetime.datetime(yr, mo, dy, hr)

def splitdate(x):
'''splits WDM int32 DATWRD into year, month, day, hour -> then returns its datetime64'''
warnings.warn("use '_splitdate' instead; naming updated to indicate internal function", DeprecationWarning)
return todatetime(x >> 14, x >> 10 & 0xF, x >> 5 & 0x1F, x & 0x1F) # args: year, month, day, hour

def splitcontrol(x):
''' splits int32 into (qual, compcode, units, tstep, nvalues)'''
warnings.warn("use '_splitcontrol' instead; naming updated to indicate internal function", DeprecationWarning)
return(x & 0x1F, x >> 5 & 0x3, x >> 7 & 0x7, x >> 10 & 0x3F, x >> 16)

def splitposition(x):
''' splits int32 into (record, offset), converting to Pyton zero based indexing'''
warnings.warn("use '_spiltposition' instead; naming updated to indicate internal function", DeprecationWarning)
return((x>>9) - 1, (x&0x1FF) - 1)

def itostr(i):
warnings.warn("use '_inttostr' instead; naming updated to indicate internal function; method also handles integer argments so updated name from 'i' to 'int' for additonal clarity", DeprecationWarning)
return chr(i & 0xFF) + chr(i>>8 & 0xFF) + chr(i>>16 & 0xFF) + chr(i>>24 & 0xFF)

# @jit(nopython=True, cache=True)
def leap_year(y):
if y % 400 == 0:
return True
if y % 100 == 0:
return False
if y % 4 == 0:
return True
else:
return False

def getfloats(iarray, farray, floats, findex, rec, offset, count, finalindex, tcode, tstep):
warnings.warn("discontinue use and replace with new 'process_groups' instead; Function replaced by incompatible group/block processing approach; reference commit c5b2a1cdd6a55eccc0db67d7840ec3eaf904dcec .",DeprecationWarning)
index = rec * 512 + offset + 1
stop = (rec + 1) * 512
cntr = 0
Expand Down Expand Up @@ -376,6 +536,7 @@ def getfloats(iarray, farray, floats, findex, rec, offset, count, finalindex, tc
return findex

def adjustNval(ldate, ltstep, tstep, ltcode, tcode, comp, nval):
warnings.warn("supporting function for deprecated 'get_floats' function;", DeprecationWarning)
lnval = nval
if comp != 1:
nval = -1 # only can adjust compressed data
Expand Down Expand Up @@ -418,4 +579,3 @@ def adjustNval(ldate, ltstep, tstep, ltcode, tcode, comp, nval):
str(nval) + ',', str(lnval), ')')

return nval

Binary file added docs/HSPF_v12.2_manual+nav.docx
Binary file not shown.
Binary file added docs/HSPF_v12.2_manual+nav.pdf
Binary file not shown.
Binary file added docs/WDMProgrammersGuide+Search+Nav.pdf
Binary file not shown.
Binary file added docs/WDMProgrammersGuide.pdf
Binary file not shown.
5 changes: 5 additions & 0 deletions environment_dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@ dependencies:
# - texlive-core # to export notebooks to PDF. https://nbconvert.readthedocs.io/en/latest/install.html#installing-tex
- matplotlib

# Dev tools (optional)
# - python-language-server
- jupyter-lsp-python # Includes both the server extension (jupyter-lsp) and pyls third-party server (python-language-server)
- jupyterlab-lsp # Docs at https://github.com/krassowski/jupyterlab-lsp

# package management
- conda
- conda-build
Expand Down
Loading