Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion upstream_delineator/delineator_utils/consolidate.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def show_area_stats(G: nx.Graph) -> None:
:param G:
:return: None
"""
areas = [data['area'] for node, data in G.nodes(data=True)]
areas = [data['area'] for node, data in G.nodes(data=True) if 'area' in data]

# Step 3: Calculate statistics using NumPy
n = len(areas)
Expand Down
41 changes: 26 additions & 15 deletions upstream_delineator/delineator_utils/delineate.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
save_network,
validate,
write_geodata,
ensure_int,
)

# Shapely throws a bunch of FutureWarnings. Safe to ignore for now, as long as we
Expand Down Expand Up @@ -243,7 +244,7 @@ def addnode(B: list, node_id):
assert len(terminal_node_df) == 1, "Should only have one outlet per watershed"
terminal_node_id = terminal_node_df.index[0]
# The terminal comid is the unit catchment that contains (overlaps) the outlet point
terminal_comid = terminal_node_df['COMID'].iat[0]
terminal_comid = int(terminal_node_df['COMID'].iat[0])

# Let upstream_comids be the list of unit catchments (and river reaches) that are in the basin
upstream_comids = []
Expand All @@ -255,7 +256,7 @@ def addnode(B: list, node_id):
# of the outlet, and therefore we cannot process them to get expected results.
gage_list = gages_gdf.index.tolist()
for id in gage_list:
comid = gages_gdf.at[id, 'COMID']
comid = int(gages_gdf.at[id, 'COMID'])
if comid not in upstream_comids:
gages_gdf.drop(id, inplace=True)
raise Warning(f"The point with id = {id} is not contained in the watershed of the first point.")
Expand Down Expand Up @@ -294,11 +295,12 @@ def addnode(B: list, node_id):

# For now, put the split polygon geometry into a field in `gages_gdf`
gages_gdf['polygon'] = None
gages_gdf['polygon_area'] = 0
gages_gdf['polygon_area'] = 0.0

# Iterate over the gages, and run `split_catchment()` for every gage
for gage_id in gages_gdf.index:
comid = gages_gdf.at[gage_id, 'COMID']
gage_id = int(gage_id)
comid = int(gages_gdf.at[gage_id, 'COMID'])
lat = gages_gdf.at[gage_id, 'lat']
lng = gages_gdf.at[gage_id, 'lng']
catchment_poly = subbasins_gdf.loc[comid, 'geometry']
Expand All @@ -319,7 +321,7 @@ def addnode(B: list, node_id):
# Now, we no longer need the downstream portion of the terminal unit catchment
# so remove its row from the subbasins GeoDataFrame
subbasins_gdf = subbasins_gdf.drop(terminal_comid)
subbasins_gdf.at[terminal_node_id, 'nextdown'] = 0
subbasins_gdf.at[terminal_node_id, 'nextdown'] = int(0)

# Create a NETWORK GRAPH of the river basin.
if config.get("VERBOSE"): print("Creating Network GRAPH")
Expand Down Expand Up @@ -402,19 +404,20 @@ def addnode(B: list, node_id):
subbasins_gdf.geometry = subbasins_gdf.geometry.apply(lambda p: close_holes(p, 0))
subbasins_gdf.reset_index(inplace=True)
subbasins_gdf.rename(columns={'target': 'comid'}, inplace=True)
subbasins_gdf['comid'] = subbasins_gdf['comid'].astype(int)
subbasins_gdf.set_index('comid', inplace=True)

# After the dissolve operation, no way to preserve correct information in column 'nextdown'
# But it is present in the Graph, so update the GeoDataFrame subbasins_gdf with that information
# Add column `nextdownid` and the stream orders based on data in the graph
subbasins_gdf['nextdown'] = 0
subbasins_gdf['nextdown'] = int(0)

for idx in subbasins_gdf.index:
try:
nextdown = list(G.successors(idx))[0]
except Exception:
nextdown = 0
subbasins_gdf.at[idx, 'nextdown'] = nextdown
subbasins_gdf.at[idx, 'nextdown'] = int(nextdown)

# Round all the areas to four decimals (just for appearances)
subbasins_gdf['unitarea'] = subbasins_gdf['unitarea'].round(1)
Expand All @@ -435,6 +438,7 @@ def addnode(B: list, node_id):
myrivers_gdf = myrivers_gdf.dissolve(by="target", aggfunc=agg)
myrivers_gdf.reset_index(inplace=True)
myrivers_gdf.rename(columns={'target': 'comid'}, inplace=True)
myrivers_gdf['comid'] = myrivers_gdf['comid'].astype(int)
myrivers_gdf.set_index('comid', inplace=True)

# We can now delete the river reach segment that belonged to the downstream portion of the
Expand All @@ -446,11 +450,12 @@ def addnode(B: list, node_id):

# Add the fields `nextdown` and the stream orders to the rivers.
for idx in myrivers_gdf.index:
idx = int(idx)
try:
nextdown = list(G.successors(idx))[0]
except Exception:
nextdown = 0
myrivers_gdf.at[idx, 'nextdown'] = nextdown
myrivers_gdf.at[idx, 'nextdown'] = int(nextdown)
myrivers_gdf.at[idx, 'strahler_order'] = G.nodes[idx]['strahler_order']
myrivers_gdf.at[idx, 'shreve_order'] = G.nodes[idx]['shreve_order']

Expand All @@ -460,6 +465,7 @@ def addnode(B: list, node_id):
subbasins_gdf['shreve_order'] = 0

for idx in subbasins_gdf.index:
idx = int(idx)
subbasins_gdf.at[idx, 'strahler_order'] = G.nodes[idx]['strahler_order']
subbasins_gdf.at[idx, 'shreve_order'] = G.nodes[idx]['shreve_order']

Expand All @@ -478,8 +484,10 @@ def addnode(B: list, node_id):

subbasins_gdf.reset_index(inplace=True)
subbasins_gdf.rename(columns={'index': 'comid'}, inplace=True)
subbasins_gdf['comid'] = subbasins_gdf['comid'].astype(int)
myrivers_gdf.reset_index(inplace=True)
myrivers_gdf.rename(columns={'index': 'comid'}, inplace=True)
myrivers_gdf['comid'] = myrivers_gdf['comid'].astype(int)

# The rivers data will no longer be accurate, so drop these columns
cols = ["lengthdir", "sinuosity", "slope", "uparea", "order", "strmDrop_t", "slope_taud", "NextDownID",
Expand Down Expand Up @@ -527,7 +535,7 @@ def update_split_catchment_geo(gages_gdf, myrivers_gdf, rivers_gdf, subbasins_gd
selected_rows.drop(columns=['COMID'], inplace=True)

# Add the column `nextdown` to our temporary GeoDataFrame selected_rows to prevent type problems below
selected_rows['nextdown'] = -999
selected_rows['nextdown'] = int(-999)

# Here is where we add the newly-created split catchments corresponding to our outlet points.
subbasins_gdf = pd.concat([subbasins_gdf, selected_rows])
Expand All @@ -537,9 +545,9 @@ def update_split_catchment_geo(gages_gdf, myrivers_gdf, rivers_gdf, subbasins_gd
# Add the column `nextdown`; we will populate it below

for node, comid in new_nodes.items():
rows = subbasins_gdf['nextdown'] == comid
subbasins_gdf.loc[rows, 'nextdown'] = node
subbasins_gdf.at[node, 'nextdown'] = comid
rows = subbasins_gdf['nextdown'] == int(comid)
subbasins_gdf.loc[rows, 'nextdown'] = int(node)
subbasins_gdf.at[node, 'nextdown'] = int(comid)

# Subtract the polygon geometry of the split catchment from its parent unit catchment
comid_poly = subbasins_gdf.loc[comid, 'geometry']
Expand Down Expand Up @@ -574,7 +582,7 @@ def update_split_catchment_geo(gages_gdf, myrivers_gdf, rivers_gdf, subbasins_gd
# This is the same as largest area to smallest area
gages_set.sort_values(by='unitarea', inplace=True)
# This one-liner fills in the correct network connection information for nested outlets
gages_set['nextdown'] = gages_set.index.to_series().shift(-1).fillna(comid)
gages_set['nextdown'] = gages_set.index.to_series().shift(-1).fillna(int(comid))

gages_set.sort_values(by='unitarea', ascending=False, inplace=True)
subbasins_gdf = pd.concat([subbasins_gdf, gages_set])
Expand Down Expand Up @@ -633,10 +641,10 @@ def update_split_catchment_geo(gages_gdf, myrivers_gdf, rivers_gdf, subbasins_gd

# After the last nested gage, we need to fix the connection info for
# any rows that previously had the unit catchment with comid as its 'nextdown'
rows = subbasins_gdf['nextdown'] == comid
rows = subbasins_gdf['nextdown'] == int(comid)
indices = list(rows.index[rows])
indices.remove(first_node)
subbasins_gdf.loc[indices, 'nextdown'] = last_node
subbasins_gdf.loc[indices, 'nextdown'] = int(last_node)
return subbasins_gdf


Expand All @@ -660,6 +668,9 @@ def make_gages_gdf(input_csv: str, csv_dtypes: dict =None) -> gpd.GeoDataFrame:
# Check that the CSV file includes at a minimum: id, lat, lng and that all values are appropriate
validate(gages_df)

# Convert id to integer to ensure consistent node IDs in graph
gages_df['id'] = gages_df['id'].apply(ensure_int)

# When a row's id matches its outlet_id, it is an outlet
gages_df["is_outlet"] = gages_df["outlet_id"] == gages_df["id"]

Expand Down
8 changes: 8 additions & 0 deletions upstream_delineator/delineator_utils/graph_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ def make_river_network(df: DataFrame, terminal_node=None) -> nx.DiGraph:

# Populate the graph's nodes and edges.
for node_id, nextdown in df['nextdown'].items():
# Ensure node_id is an integer
node_id = int(node_id)
nextdown = int(nextdown)

# Add node with comid as node ID
G.add_node(node_id)
G.nodes[node_id]['area'] = df.at[node_id, 'unitarea']
Expand Down Expand Up @@ -118,6 +122,10 @@ def insert_node(G: nx.DiGraph, node, comid) -> nx.DiGraph:
into a unit catchment that is a "leaf" node, or one with a Strahler order 1
or into a "stem" node, one with Strahler order > 1.
"""

# Ensure both node and comid are integers
node = int(node)
comid = int(comid)

# Get the Strahler order of the node we're inserting into.
order = G.nodes[comid]['strahler_order']
Expand Down
70 changes: 47 additions & 23 deletions upstream_delineator/delineator_utils/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import pickle
import re
import warnings
from functools import cache, partial
from functools import cache
from typing import Union
import requests

Expand Down Expand Up @@ -34,6 +34,30 @@
simpledec = re.compile(r"\d*\.\d+")


def ensure_int(value) -> int:
"""
Safely convert a value to an integer.
Handles string representations of numbers as well as numeric types.

Args:
value: Value to convert to int (can be str, float, int, etc.)

Returns:
int: The converted integer value

Raises:
ValueError: If the value cannot be converted to a valid integer
"""
try:
if isinstance(value, str):
# Try to convert string to float first (handles "1.0" -> 1)
return int(float(value))
else:
return int(value)
except (ValueError, TypeError) as e:
raise ValueError(f"Cannot convert '{value}' to int: {e}")


def get_largest(input_poly: Union[MultiPolygon, Polygon]) -> Polygon:
"""
Converts a Shapely MultiPolygon to a Shapely Polygon
Expand Down Expand Up @@ -200,17 +224,17 @@ def calc_area(poly: Polygon) -> float:
if poly.is_empty:
return 0

projected_poly = shapely.ops.transform(
partial(
pyproj.transform,
pyproj.Proj(init=PROJ_WGS84),
pyproj.Proj(
proj='aea',
lat_1=poly.bounds[1],
lat_2=poly.bounds[3]
)
),
poly)
aea_proj = pyproj.Proj(
proj='aea',
lat_1=poly.bounds[1],
lat_2=poly.bounds[3]
)
transformer = pyproj.Transformer.from_proj(
pyproj.Proj(init=PROJ_WGS84),
aea_proj,
always_xy=True
)
projected_poly = shapely.ops.transform(transformer.transform, poly)

# Get the area in m^2
return projected_poly.area / 1e6
Expand All @@ -229,17 +253,17 @@ def calc_length(line: LineString) -> float:
if line.is_empty:
return 0

projected_line = shapely.ops.transform(
partial(
pyproj.transform,
pyproj.Proj(init=PROJ_WGS84),
pyproj.Proj(
proj='aea',
lat_1=line.bounds[1],
lat_2=line.bounds[3]
)
),
line)
aea_proj = pyproj.Proj(
proj='aea',
lat_1=line.bounds[1],
lat_2=line.bounds[3]
)
transformer = pyproj.Transformer.from_proj(
pyproj.Proj(init=PROJ_WGS84),
aea_proj,
always_xy=True
)
projected_line = shapely.ops.transform(transformer.transform, line)

# Get the area in m^2
return projected_line.length / 1e3
Expand Down