-
Notifications
You must be signed in to change notification settings - Fork 0
Advanced Examples
automation edited this page Aug 8, 2025
·
1 revision
import pandas as pd
from sklearn.datasets import fetch_california_housing
from statclean import StatClean
housing = fetch_california_housing()
df = pd.DataFrame(housing.data, columns=housing.feature_names)
df['PRICE'] = housing.target
cleaner = StatClean(df, preserve_index=True)
# Analyze & clean selected features
features = ['MedInc', 'AveRooms', 'PRICE']
cleaned_df, info = cleaner.clean_columns(features, method='auto', show_progress=True)
# Multivariate check
mv_outliers = cleaner.detect_outliers_mahalanobis(['MedInc', 'AveRooms', 'PRICE'], chi2_threshold=0.975)
print('Multivariate outliers:', mv_outliers.sum())
# Visualization grid
figs = cleaner.plot_outlier_analysis(features)outliers = cleaner.detect_outliers_modified_zscore('PRICE')
cleaner.remove_outliers_modified_zscore('PRICE')
cleaner.visualize_outliers('PRICE')