LimnoTech · ptomasula · Apr 28, 2021 · Feb 19, 2021 · Feb 22, 2021 · Feb 22, 2021
diff --git a/HSP2/utilities.py b/HSP2/utilities.py
@@ -77,7 +77,7 @@ def transform(ts, name, how, siminfo):
     if freq == tsfreq:
         pass
     elif tsfreq == None:     # Sparse time base, frequency not defined
-        ts = ts.reindex(siminfo['tbase']).ffill().bfill()
+         ts = ts.reindex(siminfo['tbase']).ffill().bfill()
     elif how == 'SAME':
         ts = ts.resample(freq).ffill()  # tsfreq >= freq assumed, or bad user choice
     elif not how:

diff --git a/HSP2tools/.vs/VSWorkspaceState.json b/HSP2tools/.vs/VSWorkspaceState.json
@@ -0,0 +1,7 @@
+{
+  "ExpandedNodes": [
+    ""
+  ],
+  "SelectedNode": "\\readWDM.py",
+  "PreviewInSolutionExplorer": false
+}
diff --git a/HSP2tools/readWDM.py b/HSP2tools/readWDM.py
@@ -7,9 +7,11 @@
 
 import numpy as np
 import pandas as pd
-from numba import jit
+from numba import jit, njit
 import datetime
 
+import warnings
+
 # look up attributes NAME, data type (Integer; Real; String) and data length by attribute number
 attrinfo = {1:('TSTYPE','S',4),     2:('STAID','S',16),    11:('DAREA','R',1),
            17:('TCODE','I',1),     27:('TSBYR','I',1),     28:('TSBMO','I',1),
@@ -24,13 +26,20 @@
 freq = {7:'100YS', 6:'YS', 5:'MS', 4:'D', 3:'H', 2:'min', 1:'S'}   # pandas date_range() frequency by TCODE, TGROUP
 
 
-def readWDM(wdmfile, hdffile, jupyterlab=True):
+def readWDM(wdmfile, hdffile, compress_output=False):
     iarray = np.fromfile(wdmfile, dtype=np.int32)
     farray = np.fromfile(wdmfile, dtype=np.float32)
 
+    date_epoch = np.datetime64(0,'Y')
+    dt_year = np.timedelta64(1, 'Y')
+    dt_month = np.timedelta64(1, 'M')
+    dt_day = np.timedelta64(1, 'D')
+    dt_hour = np.timedelta64(1, 'h')
+    dt_minute = np.timedelta64(1, 'm')
+    dt_second = np.timedelta64(1, 's')
+
     if iarray[0] != -998:
-        print('Not a WDM file, magic number is not -990. Stopping!')
-        return
+        raise ValueError ('Provided file does not match WDM format. First int32 should be -998.')
     nrecords    = iarray[28]    # first record is File Definition Record
     ntimeseries = iarray[31]
 
@@ -39,7 +48,7 @@ def readWDM(wdmfile, hdffile, jupyterlab=True):
         if not (iarray[index]==0 and iarray[index+1]==0 and iarray[index+2]==0 and iarray[index+3]==0) and iarray[index+5]==1:
             dsnlist.append(index)
     if len(dsnlist) != ntimeseries:
-        print('PROGRAM ERROR, wrong number of DSN records found')
+        raise RuntimeError (f'Wrong number of Time Series Records found expecting:{ntimeseries} found:{len(dsnlist)}')
 
     with pd.HDFStore(hdffile) as store:
         summary = []
@@ -66,7 +75,7 @@ def readWDM(wdmfile, hdffile, jupyterlab=True):
                     elif atype == 'R':
                         dattr[name] = farray[ptr]
                     else:
-                        dattr[name] = ''.join([itostr(iarray[k]) for k in range(ptr, ptr + length // 4)]).strip()
+                        dattr[name] = ''.join([_inttostr(iarray[k]) for k in range(ptr, ptr + length // 4)]).strip()
                 if att not in dattr:
                     found_in_all = False
             if found_in_all:
@@ -98,50 +107,49 @@ def readWDM(wdmfile, hdffile, jupyterlab=True):
                 elif atype == 'R':
                     dattr[name] = farray[ptr]
                 else:
-                    dattr[name] = ''.join([itostr(iarray[k]) for k in range(ptr, ptr + length//4)]).strip()
+                    dattr[name] = ''.join([_inttostr(iarray[k]) for k in range(ptr, ptr + length//4)]).strip()
 
             # Get timeseries timebase data
-            records = []
+            records = [] 
+            offsets = []
             for i in range(pdat+1, pdatv-1):
                 a = iarray[index+i]
                 if a != 0:
-                    records.append(splitposition(a))
+                    record, offset = _splitposition(a)
+                    records.append(record)
+                    offsets.append(offset)
             if len(records) == 0:
-                continue   # WDM preallocation, but nothing saved here yet
-
-            srec, soffset = records[0]
-            start = splitdate(iarray[srec*512 + soffset])
-
-            sprec, spoffset = splitposition(frepos)
-            finalindex = sprec * 512 + spoffset
+                continue   
 
             # calculate number of data points in each group, tindex is final index for storage
             tgroup = dattr['TGROUP']
             tstep  = dattr['TSSTEP']
             tcode  = dattr['TCODE']
-            cindex = pd.date_range(start=start, periods=len(records)+1, freq=freq[tgroup])
-            tindex = pd.date_range(start=start, end=cindex[-1], freq=str(tstep) + freq[tcode])
-            counts = np.diff(np.searchsorted(tindex, cindex))
-
-            ## Get timeseries data
-            floats = np.zeros(sum(counts),  dtype=np.float32)
-            findex = 0
-            for (rec,offset),count in zip(records, counts):
-                findex = getfloats(iarray, farray, floats, findex, rec, offset, count, finalindex, tcode, tstep)
-
-            ## Write to HDF5 file
-            series = pd.Series(floats[:findex], index=tindex[:findex])
+
+            records = np.asarray(records)
+            offsets = np.asarray(offsets)
+
+            dates, values, stop_datetime = _process_groups(iarray, farray, records, offsets, tgroup)
+            stop_datetime = datetime.datetime(*_bits_to_date(stop_datetime))
+            dates = np.array(dates)
+            dates_converted = _date_convert(dates, date_epoch, dt_year, dt_month, dt_day, dt_hour, dt_minute, dt_second)
+            series = pd.Series(values, index=dates_converted)
+            try:
+                series.index.freq = str(tstep) + freq[tcode] 
+            except ValueError:
+                series.index.freq = None
+
             dsname = f'TIMESERIES/TS{dsn:03d}'
-            # series.to_hdf(store, dsname, complib='blosc', complevel=9)
-            if jupyterlab:
-                series.to_hdf(store, dsname, complib='blosc', complevel=9)  # This is the official version
+            if compress_output:
+                series.to_hdf(store, dsname, complib='blosc', complevel=9)  
             else:
-                series.to_hdf(store, dsname, format='t', data_columns=True)  # show the columns in HDFView
+                series.to_hdf(store, dsname, format='t', data_columns=True)
 
-            data = [str(tindex[0]), str(tindex[-1]), str(tstep) + freq[tcode],
-             len(series),  dattr['TSTYPE'], dattr['TFILL']]
+            data = [
+                str(series.index[0]), str(stop_datetime), str(tstep) + freq[tcode],
+                len(series),  dattr['TSTYPE'], dattr['TFILL']
+                ]
             columns = ['Start', 'Stop', 'Freq','Length', 'TSTYPE', 'TFILL']
-            # search = ['STAID', 'STNAM', 'SCENARIO', 'CONSTITUENT','LOCATION']
             for x in columns_to_add:
                 if x in dattr:
                     data.append(dattr[x])
@@ -150,11 +158,165 @@ def readWDM(wdmfile, hdffile, jupyterlab=True):
             summary.append(data)
             summaryindx.append(dsname[11:])
 
-
         dfsummary = pd.DataFrame(summary, index=summaryindx, columns=columns)
         store.put('TIMESERIES/SUMMARY',dfsummary, format='t', data_columns=True)
+
     return dfsummary
 
+@njit 
+def _splitdate(x):
+    year = np.int64(x >> 14)
+    month = np.int64(x >> 10 & 0xF)
+    day = np.int64(x >> 5 & 0x1F)
+    hour = np.int64(x & 0x1F)
+    return _correct_date(year, month, day, hour, 0,0)
+
+@njit 
+def _splitcontrol(x):
+    nval = x >> 16
+    ltstep = x >> 10 & 0x3f 
+    ltcode = x >> 7 & 0x7
+    comp = x >> 5 & 0x3
+    qual  = x & 0x1f
+    return nval, ltstep, ltcode, comp, qual
+
+@njit 
+def _splitposition(x):
+    return((x>>9) - 1, (x&0x1FF) - 1) #args: record, offset
+
+@njit 
+def _inttostr(i):
+    return chr(i & 0xFF) + chr(i>>8 & 0xFF) + chr(i>>16 & 0xFF) + chr(i>>24 & 0xFF)
+
+@njit 
+def _bits_to_date(x):
+    year = x >> 26
+    month = x >> 22 & 0xf
+    day = x >> 17 & 0x1f
+    hour = x >> 12 & 0x1f
+    minute = x >> 6 & 0x3f
+    second = x & 0x3f
+    return year, month, day, hour, minute, second
+
+@njit 
+def _date_to_bits(year, month, day, hour, minute, second):
+    x = year << 26 | month << 22 | day << 17 | hour << 12 | minute << 6 | second 
+    return x
+
+@njit 
+def _increment_date(date, timecode, timestep):
+    year, month, day, hour, minute, second = _bits_to_date(date)
+
+    if timecode == 7: year += 100 * timestep
+    elif timecode == 6 : year += timestep
+    elif timecode == 5 : month += timestep
+    elif timecode == 4 : day += timestep
+    elif timecode == 3 : hour += timestep
+    elif timecode == 2 : minute += timestep
+    elif timecode == 1 : second += timestep
+
+    return _correct_date(year, month, day, hour, minute, second)
+
+@njit 
+def _correct_date(year, month, day, hour, minute, second):
+    while second >= 60:
+        second -= 60
+        minute += 1
+    while minute >= 60:
+        minute -= 60
+        hour += 1
+    while hour >= 24:
+        hour -= 24
+        day += 1
+    while day > _days_in_month(year, month):
+        day -= _days_in_month(year, month)
+        month += 1
+    while month > 12:
+        month -= 12
+        year += 1
+    return _date_to_bits(year, month, day, hour, minute, second)
+
+@njit 
+def _days_in_month(year, month):
+    if month > 12: month %= 12
+
+    if month in (1,3,5,7,8,10,12):
+        return 31
+    elif month in (4,6,9,11):
+        return 30
+    elif month == 2:
+        if _is_leapyear(year): return 29
+        else: return 28
+
+@njit 
+def _is_leapyear(year):
+    if year % 400 == 0:
+        return True
+    if year % 100 == 0:
+        return False
+    if year % 4 == 0:
+        return True
+    else:
+        return False
+
+@njit
+def _date_convert(dates, date_epoch, dt_year, dt_month, dt_day, dt_hour, dt_minute, dt_second):
+    converted_dates = []
+    for x in dates:
+        year, month, day, hour, minute, second = _bits_to_date(x)
+        date = date_epoch
+        date += (year - 1970) * dt_year
+        date += (month - 1) * dt_month
+        date += (day - 1) * dt_day
+        date += hour * dt_hour
+        date += minute * dt_minute
+        date += second * dt_second
+        converted_dates.append(date)
+    return converted_dates
+
+@njit
+def _process_groups(iarray, farray, records, offsets, tgroup):
+    date_array = [0] #need initialize with a type for numba
+    value_array = [0.0]
+
+    for i in range(0,len(records)):
+        record = records[i]
+        offset = offsets[i]
+        index = record * 512 + offset
+        pscfwr = iarray[record * 512 + 3] #should be 0 for last record in timeseries 
+        current_date = _splitdate(iarray[index])
+        group_enddate = _increment_date(current_date, tgroup, 1)
+        offset +=1
+        index +=1
+
+        while current_date < group_enddate:
+            nval, ltstep, ltcode, comp, qual = _splitcontrol(iarray[index])  
+            #compressed - only has single value which applies to full range
+            if comp == 1:
+                for i in range(0, nval, 1):
+                    date_array.append(current_date)
+                    current_date = _increment_date(current_date, ltcode, ltstep) 
+                    value_array.append(farray[index + 1])
+                index += 2
+                offset +=2
+            else:
+                for i in range(0, nval, 1):
+                    date_array.append(current_date)
+                    current_date = _increment_date(current_date, ltcode, ltstep) 
+                    value_array.append(farray[index + 1 + i])
+                index += 1 + nval
+                offset +=1 + nval
+
+            if offset >= 511:
+                offset = 4
+                index = (pscfwr - 1) * 512 + offset
+                record = pscfwr
+                pscfwr = iarray[(record - 1) * 512 + 3] #should be 0 for last record in timeseries
+
+    date_array = date_array[1:]
+    value_array = value_array[1:]
+
+    return date_array, value_array, group_enddate
 
 '''
 Get single time series data from a WDM file
@@ -289,41 +451,39 @@ def get_wdm_data_set(wdmfile, attributes):
 
     return None
 
+########################
+### legacy functions ###
+########################
 
 def todatetime(yr=1900, mo=1, dy=1, hr=0):
     '''takes yr,mo,dy,hr information then returns its datetime64'''
+    warnings.warn("use '_date_convert' instead; Removed for numba compatible datetime handling; reference commit 1b52a1736e45a497ccdf78cd6e2eab8c0b7a444f", DeprecationWarning)
     if hr == 24:
         return datetime.datetime(yr, mo, dy, 23) + pd.Timedelta(1,'h')
     else:
         return datetime.datetime(yr, mo, dy, hr)
 
 def splitdate(x):
     '''splits WDM int32 DATWRD into year, month, day, hour -> then returns its datetime64'''
+    warnings.warn("use '_splitdate' instead; naming updated to indicate internal function", DeprecationWarning)
     return todatetime(x >> 14, x >> 10 & 0xF, x >> 5 & 0x1F, x & 0x1F) # args: year, month, day, hour
 
 def splitcontrol(x):
     ''' splits int32 into (qual, compcode, units, tstep, nvalues)'''
+    warnings.warn("use '_splitcontrol' instead; naming updated to indicate internal function", DeprecationWarning)
     return(x & 0x1F, x >> 5 & 0x3, x >> 7 & 0x7, x >> 10 & 0x3F, x >> 16)
 
 def splitposition(x):
     ''' splits int32 into (record, offset), converting to Pyton zero based indexing'''
+    warnings.warn("use '_spiltposition' instead; naming updated to indicate internal function", DeprecationWarning)
     return((x>>9) - 1, (x&0x1FF) - 1)
 
 def itostr(i):
+    warnings.warn("use '_inttostr' instead; naming updated to indicate internal function; method also handles integer argments so updated name from 'i' to 'int' for additonal clarity", DeprecationWarning)
     return chr(i & 0xFF) + chr(i>>8 & 0xFF) + chr(i>>16 & 0xFF) + chr(i>>24 & 0xFF)
 
-# @jit(nopython=True, cache=True)
-def leap_year(y):
-    if y % 400 == 0:
-        return True
-    if y % 100 == 0:
-        return False
-    if y % 4 == 0:
-        return True
-    else:
-        return False
-
 def getfloats(iarray, farray, floats, findex, rec, offset, count, finalindex, tcode, tstep):
+    warnings.warn("discontinue use and replace with new 'process_groups' instead; Function replaced by incompatible group/block processing approach; reference commit c5b2a1cdd6a55eccc0db67d7840ec3eaf904dcec .",DeprecationWarning)
     index = rec * 512 + offset + 1
     stop = (rec + 1) * 512
     cntr = 0
@@ -376,6 +536,7 @@ def getfloats(iarray, farray, floats, findex, rec, offset, count, finalindex, tc
     return findex
 
 def adjustNval(ldate, ltstep, tstep, ltcode, tcode, comp, nval):
+    warnings.warn("supporting function for deprecated 'get_floats' function;", DeprecationWarning)
     lnval = nval
     if comp != 1:
         nval = -1  # only can adjust compressed data
@@ -418,4 +579,3 @@ def adjustNval(ldate, ltstep, tstep, ltcode, tcode, comp, nval):
               str(nval) + ',', str(lnval), ')')
 
     return nval
-
diff --git a/docs/HSPF_v12.2_manual+nav.docx b/docs/HSPF_v12.2_manual+nav.docx
diff --git a/docs/HSPF_v12.2_manual+nav.pdf b/docs/HSPF_v12.2_manual+nav.pdf
diff --git a/docs/WDMProgrammersGuide+Search+Nav.pdf b/docs/WDMProgrammersGuide+Search+Nav.pdf
diff --git a/docs/WDMProgrammersGuide.pdf b/docs/WDMProgrammersGuide.pdf
diff --git a/environment_dev.yml b/environment_dev.yml
@@ -20,6 +20,11 @@ dependencies:
   # - texlive-core  # to export notebooks to PDF. https://nbconvert.readthedocs.io/en/latest/install.html#installing-tex
   - matplotlib
 
+  # Dev tools (optional)
+  # - python-language-server
+  - jupyter-lsp-python  # Includes both the server extension (jupyter-lsp) and pyls third-party server (python-language-server)
+  - jupyterlab-lsp # Docs at https://github.com/krassowski/jupyterlab-lsp
+
   # package management
   - conda
   - conda-build