This repository was archived by the owner on Jan 8, 2026. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathquick_start.py
More file actions
140 lines (109 loc) ยท 4.48 KB
/
quick_start.py
File metadata and controls
140 lines (109 loc) ยท 4.48 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
#!/usr/bin/env python3
"""
Quick Start Script for Datasets Repository
This script provides a quick demonstration of loading and exploring all datasets
in the repository. Run this to get started quickly!
Usage:
python quick_start.py
"""
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')
def main():
"""Main function to demonstrate dataset usage"""
print("๐ Datasets Repository Quick Start")
print("=" * 50)
# Check if datasets exist
datasets_path = Path(".")
csv_files = list(datasets_path.glob("*.csv"))
if not csv_files:
print("โ No CSV files found in current directory!")
print("Please run this script from the datasets directory.")
return
print(f"๐ Found {len(csv_files)} datasets:")
for file in csv_files:
print(f" - {file.name}")
print("\n" + "=" * 50)
# Load and explore each dataset
explore_datasets(csv_files)
print("\nโ
Quick start completed!")
print("๐ Check the docs/ folder for detailed examples and usage patterns.")
def explore_datasets(csv_files):
"""Explore each dataset and show basic information"""
for i, csv_file in enumerate(csv_files):
print(f"\n๐ Dataset {i+1}: {csv_file.name}")
print("-" * 40)
try:
# Load dataset
df = pd.read_csv(csv_file)
# Basic info
print(f"Shape: {df.shape}")
print(f"Memory usage: {df.memory_usage(deep=True).sum() / 1024 / 1024:.2f} MB")
# Data types
numeric_cols = df.select_dtypes(include=[np.number]).columns
categorical_cols = df.select_dtypes(include=['object']).columns
print(f"Numerical columns: {len(numeric_cols)}")
print(f"Categorical columns: {len(categorical_cols)}")
# Missing values
missing_count = df.isnull().sum().sum()
if missing_count > 0:
print(f"Missing values: {missing_count}")
else:
print("Missing values: None")
# Sample data
print("\nFirst 3 rows:")
print(df.head(3).to_string())
# Basic statistics for numerical columns
if len(numeric_cols) > 0:
print(f"\nNumerical columns statistics:")
print(df[numeric_cols].describe().round(2))
# Categorical value counts (show top 5)
if len(categorical_cols) > 0:
print(f"\nCategorical columns sample values:")
for col in categorical_cols[:3]: # Limit to 3 columns
unique_vals = df[col].nunique()
if unique_vals <= 10:
print(f"{col}: {df[col].value_counts().head().to_dict()}")
else:
print(f"{col}: {unique_vals} unique values")
except Exception as e:
print(f"โ Error loading {csv_file.name}: {str(e)}")
print("-" * 40)
def show_dataset_summary():
"""Show a summary table of all datasets"""
print("\n๐ Dataset Summary Table")
print("=" * 80)
summary_data = []
for csv_file in Path(".").glob("*.csv"):
try:
df = pd.read_csv(csv_file)
file_size = csv_file.stat().st_size / 1024 / 1024 # MB
summary_data.append({
'Dataset': csv_file.name,
'Rows': df.shape[0],
'Columns': df.shape[1],
'Size (MB)': f"{file_size:.2f}",
'Numerical': len(df.select_dtypes(include=[np.number]).columns),
'Categorical': len(df.select_dtypes(include=['object']).columns),
'Missing Values': df.isnull().sum().sum()
})
except:
continue
if summary_data:
summary_df = pd.DataFrame(summary_data)
print(summary_df.to_string(index=False))
print("=" * 80)
if __name__ == "__main__":
try:
main()
show_dataset_summary()
except KeyboardInterrupt:
print("\n\nโน๏ธ Script interrupted by user")
except Exception as e:
print(f"\nโ Unexpected error: {str(e)}")
print("Please check that all required packages are installed:")
print("pip install -r requirements.txt")