Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion sdmetrics/visualization.py
Original file line number Diff line number Diff line change
Expand Up @@ -440,7 +440,7 @@ def _generate_cardinality_plot(
if plot_type == 'bar':
max_cardinality = _get_max_between_datasets(real_data, synthetic_data)
min_cardinality = _get_min_between_datasets(real_data, synthetic_data)
plot_kwargs = {'nbins': max_cardinality - min_cardinality + 1}
plot_kwargs = {'nbins': int(max_cardinality - min_cardinality + 1)}

return _generate_column_plot(
real_data, synthetic_data, plot_type, plot_kwargs, plot_title, x_label
Expand Down
28 changes: 28 additions & 0 deletions tests/unit/test_visualization.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import pandas as pd
import pytest

from sdmetrics.demos import load_demo
from sdmetrics.reports.utils import PlotConfig
from sdmetrics.visualization import (
_generate_box_plot,
Expand Down Expand Up @@ -354,6 +355,33 @@ def test_get_cardinality_plot_bad_plot_type():
)


def test_get_cardinality_plot_with_string_id_columns():
"""Test ``get_cardinality_plot`` doesn't crash when ID columns have string dtype."""
# Setup
real_data, synthetic_data, metadata = load_demo(modality='multi_table')

# cast the primary/foreign columns in the user-sessions relationship to string
real_data['users']['user_id'] = real_data['users']['user_id'].astype('string')
real_data['sessions']['user_id'] = real_data['sessions']['user_id'].astype('string')

synthetic_data['users']['user_id'] = synthetic_data['users']['user_id'].astype('string')
synthetic_data['sessions']['user_id'] = synthetic_data['sessions']['user_id'].astype('string')

# Run
fig = get_cardinality_plot(
real_data=real_data,
synthetic_data=synthetic_data,
parent_table_name='users',
child_table_name='sessions',
parent_primary_key='user_id',
child_foreign_key='user_id',
plot_type='bar',
)

# Assert
assert fig is not None


def test_get_column_plot_column_not_found():
"""Test the ``get_column_plot`` method when column is not present."""
# Setup
Expand Down