Upstream-Tech · samn · Oct 20, 2025 · Oct 21, 2025
diff --git a/upstream_delineator/delineator_utils/consolidate.py b/upstream_delineator/delineator_utils/consolidate.py
@@ -27,7 +27,7 @@ def show_area_stats(G: nx.Graph) -> None:
     :param G:
     :return: None
     """
-    areas = [data['area'] for node, data in G.nodes(data=True)]
+    areas = [data['area'] for node, data in G.nodes(data=True) if 'area' in data]
 
     # Step 3: Calculate statistics using NumPy
     n = len(areas)

diff --git a/upstream_delineator/delineator_utils/delineate.py b/upstream_delineator/delineator_utils/delineate.py
@@ -41,6 +41,7 @@
     save_network,
     validate,
     write_geodata,
+    ensure_int,
 )
 
 # Shapely throws a bunch of FutureWarnings. Safe to ignore for now, as long as we
@@ -243,7 +244,7 @@ def addnode(B: list, node_id):
     assert len(terminal_node_df) == 1, "Should only have one outlet per watershed"
     terminal_node_id = terminal_node_df.index[0]
     # The terminal comid is the unit catchment that contains (overlaps) the outlet point
-    terminal_comid = terminal_node_df['COMID'].iat[0]
+    terminal_comid = int(terminal_node_df['COMID'].iat[0])
 
     # Let upstream_comids be the list of unit catchments (and river reaches) that are in the basin
     upstream_comids = []
@@ -255,7 +256,7 @@ def addnode(B: list, node_id):
     # of the outlet, and therefore we cannot process them to get expected results.
     gage_list = gages_gdf.index.tolist()
     for id in gage_list:
-        comid = gages_gdf.at[id, 'COMID']
+        comid = int(gages_gdf.at[id, 'COMID'])
         if comid not in upstream_comids:
             gages_gdf.drop(id, inplace=True)
             raise Warning(f"The point with id = {id} is not contained in the watershed of the first point.")
@@ -294,11 +295,12 @@ def addnode(B: list, node_id):
 
     # For now, put the split polygon geometry into a field in `gages_gdf`
     gages_gdf['polygon'] = None
-    gages_gdf['polygon_area'] = 0
+    gages_gdf['polygon_area'] = 0.0
 
     # Iterate over the gages, and run `split_catchment()` for every gage
     for gage_id in gages_gdf.index:
-        comid = gages_gdf.at[gage_id, 'COMID']
+        gage_id = int(gage_id)
+        comid = int(gages_gdf.at[gage_id, 'COMID'])
         lat = gages_gdf.at[gage_id, 'lat']
         lng = gages_gdf.at[gage_id, 'lng']
         catchment_poly = subbasins_gdf.loc[comid, 'geometry']
@@ -319,7 +321,7 @@ def addnode(B: list, node_id):
     # Now, we no longer need the downstream portion of the terminal unit catchment
     # so remove its row from the subbasins GeoDataFrame
     subbasins_gdf = subbasins_gdf.drop(terminal_comid)
-    subbasins_gdf.at[terminal_node_id, 'nextdown'] = 0
+    subbasins_gdf.at[terminal_node_id, 'nextdown'] = int(0)
 
     # Create a NETWORK GRAPH of the river basin.
     if config.get("VERBOSE"): print("Creating Network GRAPH")
@@ -402,19 +404,20 @@ def addnode(B: list, node_id):
         subbasins_gdf.geometry = subbasins_gdf.geometry.apply(lambda p: close_holes(p, 0))
         subbasins_gdf.reset_index(inplace=True)
         subbasins_gdf.rename(columns={'target': 'comid'}, inplace=True)
+        subbasins_gdf['comid'] = subbasins_gdf['comid'].astype(int)
         subbasins_gdf.set_index('comid', inplace=True)
 
         # After the dissolve operation, no way to preserve correct information in column 'nextdown'
         # But it is present in the Graph, so update the GeoDataFrame subbasins_gdf with that information
         # Add column `nextdownid` and the stream orders based on data in the graph
-        subbasins_gdf['nextdown'] = 0
+        subbasins_gdf['nextdown'] = int(0)
 
         for idx in subbasins_gdf.index:
             try:
                 nextdown = list(G.successors(idx))[0]
             except Exception:
                 nextdown = 0
-            subbasins_gdf.at[idx, 'nextdown'] = nextdown
+            subbasins_gdf.at[idx, 'nextdown'] = int(nextdown)
 
     # Round all the areas to four decimals (just for appearances)
     subbasins_gdf['unitarea'] = subbasins_gdf['unitarea'].round(1)
@@ -435,6 +438,7 @@ def addnode(B: list, node_id):
             myrivers_gdf = myrivers_gdf.dissolve(by="target", aggfunc=agg)
             myrivers_gdf.reset_index(inplace=True)
             myrivers_gdf.rename(columns={'target': 'comid'}, inplace=True)
+            myrivers_gdf['comid'] = myrivers_gdf['comid'].astype(int)
             myrivers_gdf.set_index('comid', inplace=True)
 
     # We can now delete the river reach segment that belonged to the downstream portion of the
@@ -446,11 +450,12 @@ def addnode(B: list, node_id):
 
     # Add the fields `nextdown` and the stream orders to the rivers.
     for idx in myrivers_gdf.index:
+        idx = int(idx)
         try:
             nextdown = list(G.successors(idx))[0]
         except Exception:
             nextdown = 0
-        myrivers_gdf.at[idx, 'nextdown'] = nextdown
+        myrivers_gdf.at[idx, 'nextdown'] = int(nextdown)
         myrivers_gdf.at[idx, 'strahler_order'] = G.nodes[idx]['strahler_order']
         myrivers_gdf.at[idx, 'shreve_order'] = G.nodes[idx]['shreve_order']
 
@@ -460,6 +465,7 @@ def addnode(B: list, node_id):
     subbasins_gdf['shreve_order'] = 0
 
     for idx in subbasins_gdf.index:
+        idx = int(idx)
         subbasins_gdf.at[idx, 'strahler_order'] = G.nodes[idx]['strahler_order']
         subbasins_gdf.at[idx, 'shreve_order'] = G.nodes[idx]['shreve_order']
 
@@ -478,8 +484,10 @@ def addnode(B: list, node_id):
 
     subbasins_gdf.reset_index(inplace=True)
     subbasins_gdf.rename(columns={'index': 'comid'}, inplace=True)
+    subbasins_gdf['comid'] = subbasins_gdf['comid'].astype(int)
     myrivers_gdf.reset_index(inplace=True)
     myrivers_gdf.rename(columns={'index': 'comid'}, inplace=True)
+    myrivers_gdf['comid'] = myrivers_gdf['comid'].astype(int)
 
     # The rivers data will no longer be accurate, so drop these columns
     cols = ["lengthdir", "sinuosity", "slope", "uparea", "order", "strmDrop_t", "slope_taud", "NextDownID",
@@ -527,7 +535,7 @@ def update_split_catchment_geo(gages_gdf, myrivers_gdf, rivers_gdf, subbasins_gd
         selected_rows.drop(columns=['COMID'], inplace=True)
 
         # Add the column `nextdown` to our temporary GeoDataFrame selected_rows to prevent type problems below
-        selected_rows['nextdown'] = -999
+        selected_rows['nextdown'] = int(-999)
 
         # Here is where we add the newly-created split catchments corresponding to our outlet points.
         subbasins_gdf = pd.concat([subbasins_gdf, selected_rows])
@@ -537,9 +545,9 @@ def update_split_catchment_geo(gages_gdf, myrivers_gdf, rivers_gdf, subbasins_gd
         # Add the column `nextdown`; we will populate it below
 
         for node, comid in new_nodes.items():
-            rows = subbasins_gdf['nextdown'] == comid
-            subbasins_gdf.loc[rows, 'nextdown'] = node
-            subbasins_gdf.at[node, 'nextdown'] = comid
+            rows = subbasins_gdf['nextdown'] == int(comid)
+            subbasins_gdf.loc[rows, 'nextdown'] = int(node)
+            subbasins_gdf.at[node, 'nextdown'] = int(comid)
 
             # Subtract the polygon geometry of the split catchment from its parent unit catchment
             comid_poly = subbasins_gdf.loc[comid, 'geometry']
@@ -574,7 +582,7 @@ def update_split_catchment_geo(gages_gdf, myrivers_gdf, rivers_gdf, subbasins_gd
         # This is the same as largest area to smallest area
         gages_set.sort_values(by='unitarea', inplace=True)
         # This one-liner fills in the correct network connection information for nested outlets
-        gages_set['nextdown'] = gages_set.index.to_series().shift(-1).fillna(comid)
+        gages_set['nextdown'] = gages_set.index.to_series().shift(-1).fillna(int(comid))
 
         gages_set.sort_values(by='unitarea', ascending=False, inplace=True)
         subbasins_gdf = pd.concat([subbasins_gdf, gages_set])
@@ -633,10 +641,10 @@ def update_split_catchment_geo(gages_gdf, myrivers_gdf, rivers_gdf, subbasins_gd
 
         # After the last nested gage, we need to fix the connection info for
         # any rows that previously had the unit catchment with comid as its 'nextdown'
-        rows = subbasins_gdf['nextdown'] == comid
+        rows = subbasins_gdf['nextdown'] == int(comid)
         indices = list(rows.index[rows])
         indices.remove(first_node)
-        subbasins_gdf.loc[indices, 'nextdown'] = last_node
+        subbasins_gdf.loc[indices, 'nextdown'] = int(last_node)
     return subbasins_gdf
 
 
@@ -660,6 +668,9 @@ def make_gages_gdf(input_csv: str, csv_dtypes: dict =None) -> gpd.GeoDataFrame:
     # Check that the CSV file includes at a minimum: id, lat, lng and that all values are appropriate
     validate(gages_df)
 
+    # Convert id to integer to ensure consistent node IDs in graph
+    gages_df['id'] = gages_df['id'].apply(ensure_int)
+
     # When a row's id matches its outlet_id, it is an outlet
     gages_df["is_outlet"] = gages_df["outlet_id"] == gages_df["id"]
 

diff --git a/upstream_delineator/delineator_utils/graph_tools.py b/upstream_delineator/delineator_utils/graph_tools.py
@@ -17,6 +17,10 @@ def make_river_network(df: DataFrame, terminal_node=None) -> nx.DiGraph:
 
     # Populate the graph's nodes and edges.
     for node_id, nextdown in df['nextdown'].items():
+        # Ensure node_id is an integer
+        node_id = int(node_id)
+        nextdown = int(nextdown)
+
         # Add node with comid as node ID
         G.add_node(node_id)
         G.nodes[node_id]['area'] = df.at[node_id, 'unitarea']
@@ -118,6 +122,10 @@ def insert_node(G: nx.DiGraph, node, comid) -> nx.DiGraph:
     into a unit catchment that is a "leaf" node, or one with a Strahler order 1
     or into a "stem" node, one with Strahler order > 1.
     """
+
+    # Ensure both node and comid are integers
+    node = int(node)
+    comid = int(comid)
 
     # Get the Strahler order of the node we're inserting into.
     order = G.nodes[comid]['strahler_order']

diff --git a/upstream_delineator/delineator_utils/util.py b/upstream_delineator/delineator_utils/util.py
@@ -3,7 +3,7 @@
 import pickle
 import re
 import warnings
-from functools import cache, partial
+from functools import cache
 from typing import Union
 import requests
 
@@ -34,6 +34,30 @@
 simpledec = re.compile(r"\d*\.\d+")
 
 
+def ensure_int(value) -> int:
+    """
+    Safely convert a value to an integer.
+    Handles string representations of numbers as well as numeric types.
+
+    Args:
+        value: Value to convert to int (can be str, float, int, etc.)
+
+    Returns:
+        int: The converted integer value
+
+    Raises:
+        ValueError: If the value cannot be converted to a valid integer
+    """
+    try:
+        if isinstance(value, str):
+            # Try to convert string to float first (handles "1.0" -> 1)
+            return int(float(value))
+        else:
+            return int(value)
+    except (ValueError, TypeError) as e:
+        raise ValueError(f"Cannot convert '{value}' to int: {e}")
+
+
 def get_largest(input_poly: Union[MultiPolygon, Polygon]) -> Polygon:
     """
     Converts a Shapely MultiPolygon to a Shapely Polygon
@@ -200,17 +224,17 @@ def calc_area(poly: Polygon) -> float:
     if poly.is_empty:
         return 0
 
-    projected_poly = shapely.ops.transform(
-        partial(
-            pyproj.transform,
-            pyproj.Proj(init=PROJ_WGS84),
-            pyproj.Proj(
-                proj='aea',
-                lat_1=poly.bounds[1],
-                lat_2=poly.bounds[3]
-            )
-        ),
-        poly)
+    aea_proj = pyproj.Proj(
+        proj='aea',
+        lat_1=poly.bounds[1],
+        lat_2=poly.bounds[3]
+    )
+    transformer = pyproj.Transformer.from_proj(
+        pyproj.Proj(init=PROJ_WGS84),
+        aea_proj,
+        always_xy=True
+    )
+    projected_poly = shapely.ops.transform(transformer.transform, poly)
 
     # Get the area in m^2
     return projected_poly.area / 1e6
@@ -229,17 +253,17 @@ def calc_length(line: LineString) -> float:
     if line.is_empty:
         return 0
 
-    projected_line = shapely.ops.transform(
-        partial(
-            pyproj.transform,
-            pyproj.Proj(init=PROJ_WGS84),
-            pyproj.Proj(
-                proj='aea',
-                lat_1=line.bounds[1],
-                lat_2=line.bounds[3]
-            )
-        ),
-        line)
+    aea_proj = pyproj.Proj(
+        proj='aea',
+        lat_1=line.bounds[1],
+        lat_2=line.bounds[3]
+    )
+    transformer = pyproj.Transformer.from_proj(
+        pyproj.Proj(init=PROJ_WGS84),
+        aea_proj,
+        always_xy=True
+    )
+    projected_line = shapely.ops.transform(transformer.transform, line)
 
     # Get the area in m^2
     return projected_line.length / 1e3