Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions bluemath_tk/core/decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ def wrapper(
directional_variables: List[str] = [],
custom_scale_factor: dict = {},
min_number_of_points: int = None,
max_number_of_iterations: int = 10,
normalize_data: bool = True,
):
if data is None:
Expand All @@ -133,6 +134,11 @@ def wrapper(
if min_number_of_points is not None:
if not isinstance(min_number_of_points, int) or min_number_of_points <= 0:
raise ValueError("Minimum number of points must be integer and > 0")
if (
not isinstance(max_number_of_iterations, int)
or max_number_of_iterations <= 0
):
raise ValueError("Maximum number of iterations must be integer and > 0")
if not isinstance(normalize_data, bool):
raise TypeError("Normalize data must be a boolean")
return func(
Expand Down
16 changes: 14 additions & 2 deletions bluemath_tk/datamining/kma.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ def fit(
directional_variables: List[str] = [],
custom_scale_factor: dict = {},
min_number_of_points: int = None,
max_number_of_iterations: int = 10,
normalize_data: bool = True,
) -> None:
"""
Expand All @@ -206,6 +207,10 @@ def fit(
min_number_of_points : int, optional
The minimum number of points to consider a cluster.
Default is None.
max_number_of_iterations : int, optional
The maximum number of iterations for the K-Means algorithm.
This is used when min_number_of_points is not None.
Default is 10.
normalize_data : bool, optional
A flag to normalize the data. Default is True.
"""
Expand Down Expand Up @@ -248,9 +253,10 @@ def fit(
if np.all(counts >= min_number_of_points):
stable_kma_child = True
number_of_tries += 1
if number_of_tries > 10:
if number_of_tries > max_number_of_iterations:
raise ValueError(
"Failed to find a stable K-Means configuration after 10 attempts."
f"Failed to find a stable K-Means configuration after {max_number_of_iterations} attempts."
"Change max_number_of_iterations or min_number_of_points."
)
self.logger.info(
f"Found a stable K-Means configuration after {number_of_tries} attempts."
Expand Down Expand Up @@ -318,6 +324,7 @@ def fit_predict(
directional_variables: List[str] = [],
custom_scale_factor: dict = {},
min_number_of_points: int = None,
max_number_of_iterations: int = 10,
normalize_data: bool = True,
) -> Tuple[pd.DataFrame, pd.DataFrame]:
"""
Expand All @@ -337,6 +344,10 @@ def fit_predict(
min_number_of_points : int, optional
The minimum number of points to consider a cluster.
Default is None.
max_number_of_iterations : int, optional
The maximum number of iterations for the K-Means algorithm.
This is used when min_number_of_points is not None.
Default is 10.
normalize_data : bool, optional
A flag to normalize the data. Default is True.

Expand All @@ -352,6 +363,7 @@ def fit_predict(
directional_variables=directional_variables,
custom_scale_factor=custom_scale_factor,
min_number_of_points=min_number_of_points,
max_number_of_iterations=max_number_of_iterations,
normalize_data=normalize_data,
)

Expand Down
2 changes: 2 additions & 0 deletions bluemath_tk/predictor/xwt.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,8 @@ def fit(

kma: KMA = self.steps.get("kma")
self.num_clusters = kma.num_clusters
# TODO: standarize PCs by first PC variance
# pca.pcs_df / pca.pcs.stds.isel(n_component=0).values
kma_bmus, _kma_bmus_df = kma.fit_predict(
data=pca.pcs_df,
**fit_params.get("kma", {}),
Expand Down
29 changes: 19 additions & 10 deletions bluemath_tk/topo_bathy/swan_grid.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,17 +23,26 @@ def generate_grid_parameters(bathy_data: xr.DataArray) -> dict:
"""

return {
"xpc": np.nanmin(bathy_data.lon), # x origin
"ypc": np.nanmin(bathy_data.lat), # y origin
"xpc": int(np.nanmin(bathy_data.lon)), # x origin
"ypc": int(np.nanmin(bathy_data.lat)), # y origin
"alpc": 0, # x-axis direction
"xlenc": np.nanmax(bathy_data.lon)
- np.nanmin(bathy_data.lon), # grid length in x
"ylenc": np.nanmax(bathy_data.lat)
- np.nanmin(bathy_data.lat), # grid length in y
"xlenc": int(
np.nanmax(bathy_data.lon) - np.nanmin(bathy_data.lon)
), # grid length in x
"ylenc": int(
np.nanmax(bathy_data.lat) - np.nanmin(bathy_data.lat)
), # grid length in y
"mxc": len(bathy_data.lon) - 1, # number mesh x, una menos pq si no SWAN peta
"myc": len(bathy_data.lat) - 1, # number mesh y, una menos pq si no SWAN peta
"dxinp": bathy_data.lon[1].values
- bathy_data.lon[0].values, # size mesh x (resolution in x)
"dyinp": bathy_data.lat[1].values
- bathy_data.lat[0].values, # size mesh y (resolution in y)
"xpinp": np.nanmin(bathy_data.lon), # x origin
"ypinp": np.nanmin(bathy_data.lat), # y origin
"alpinp": 0, # x-axis direction
"mxinp": len(bathy_data.lon) - 1, # number mesh x
"myinp": len(bathy_data.lat) - 1, # number mesh y
"dxinp": abs(
bathy_data.lon[1].values - bathy_data.lon[0].values
), # size mesh x (resolution in x)
"dyinp": abs(
bathy_data.lat[1].values - bathy_data.lat[0].values
), # size mesh y (resolution in y)
}
19 changes: 9 additions & 10 deletions bluemath_tk/waves/binwaves.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,12 +110,11 @@ def process_kp_coefficients(
print(f"Error processing {input_spec_file} and {output_spec_file}")
print(e)

return (
xr.concat(output_kp_list, dim="case_num")
.fillna(0.0)
.sortby("freq")
.sortby("dir")
)
# Concat files one by one
concatened_kp = output_kp_list[0]
for file in output_kp_list[1:]:
concatened_kp = xr.concat([concatened_kp, file], dim="case_num")
return concatened_kp.fillna(0.0).sortby("freq").sortby("dir")


def reconstruc_spectra(
Expand Down Expand Up @@ -149,15 +148,15 @@ def reconstruc_spectra(

# Setup Dask client
if num_workers is None:
num_workers = os.environ.get("BLUEMATH_NUM_WORKERS", 2)
num_workers = os.environ.get("BLUEMATH_NUM_WORKERS", 4)
client = setup_dask_client(n_workers=num_workers, memory_limit=memory_limit)

try:
# Process with controlled chunks
offshore_spectra_chunked = offshore_spectra.chunk(
{"time": chunk_sizes.get("time", 24)}
{"time": chunk_sizes.get("time", 24 * 7)}
)
kp_coeffs_chunked = kp_coeffs.chunk({"site": 1})
kp_coeffs_chunked = kp_coeffs.chunk({"site": 10})
with ProgressBar():
onshore_spectra = (
(offshore_spectra_chunked * kp_coeffs_chunked).sum(dim="case_num")
Expand Down Expand Up @@ -250,7 +249,7 @@ def plot_selected_cases_grid(
ax = fig.add_subplot(1, 1, 1, projection="polar")

# prepare data
x = np.append(np.deg2rad(directions - 7.5), np.deg2rad(directions - 7.5)[0])
x = np.append(np.deg2rad(directions), np.deg2rad(directions)[0])
y = np.append(0, frequencies)
z = (
np.array(range(len(frequencies) * len(directions)))
Expand Down
Loading