Skip to content

Advanced Examples

automation edited this page Aug 8, 2025 · 1 revision

Advanced Examples

California Housing End-to-End

import pandas as pd
from sklearn.datasets import fetch_california_housing
from statclean import StatClean

housing = fetch_california_housing()
df = pd.DataFrame(housing.data, columns=housing.feature_names)
df['PRICE'] = housing.target

cleaner = StatClean(df, preserve_index=True)

# Analyze & clean selected features
features = ['MedInc', 'AveRooms', 'PRICE']
cleaned_df, info = cleaner.clean_columns(features, method='auto', show_progress=True)

# Multivariate check
mv_outliers = cleaner.detect_outliers_mahalanobis(['MedInc', 'AveRooms', 'PRICE'], chi2_threshold=0.975)
print('Multivariate outliers:', mv_outliers.sum())

# Visualization grid
figs = cleaner.plot_outlier_analysis(features)

Modified Z-score Visualization

outliers = cleaner.detect_outliers_modified_zscore('PRICE')
cleaner.remove_outliers_modified_zscore('PRICE')
cleaner.visualize_outliers('PRICE')

Back to top

Clone this wiki locally