|
1 | 1 | from email.errors import FirstHeaderLineIsContinuationDefect |
2 | | - |
3 | 2 | import pandas as pd |
4 | 3 | import numpy as np |
5 | 4 | import matplotlib.pyplot as plt |
6 | 5 | import seaborn as sns |
7 | 6 | import warnings |
8 | | - |
9 | 7 | from numpy.conftest import dtype |
10 | 8 |
|
11 | 9 | warnings.simplefilter("ignore") |
12 | 10 |
|
13 | 11 |
|
14 | 12 | #Import iris Dataset. |
| 13 | +df=pd.read_csv(r'C:\Users\PC\Downloads\iris\iris.data') |
| 14 | + |
| 15 | + |
| 16 | +# Assign column names to the dataset |
| 17 | +df.columns = ['SepalLength', 'SepalWidth', 'PetalLength','PetalWidth', 'Species'] |
| 18 | + |
| 19 | +# Dataset Info |
| 20 | +df.info() |
| 21 | + |
| 22 | +# Checking for null values |
| 23 | +print(df.isnull().sum()) |
| 24 | + |
| 25 | +# Display column names |
| 26 | +print(df.columns) |
| 27 | + |
| 28 | +#Value counts for Species column |
| 29 | +print("Unique Species:", df['Species'].unique()) |
| 30 | +print("Duplicate Rows:", df.duplicated().sum()) |
| 31 | + |
| 32 | +#Visualize the species count |
| 33 | +sns.pairplot(df,hue='Species', palette='husl') |
| 34 | +sns.set(style="whitegrid") #seaborn style |
| 35 | +plt.title("Pair Plot of Iris DataSet") |
| 36 | +plt.show() |
| 37 | + |
| 38 | + |
| 39 | +#Heatmaps |
| 40 | +plt.figure(figsize=(10,6)) |
| 41 | +sns.heatmap(df.drop('Species', axis=1).corr(),annot=True,cmap='coolwarm') |
| 42 | +plt.title("Correlation Matrix") |
| 43 | +plt.show() |
| 44 | + |
| 45 | +#Box Plot for Outlier |
| 46 | +for col in ['SepalLength', 'SepalWidth','PetalLength','PetalWidth']: |
| 47 | + sns.boxplot(df[col]) |
| 48 | + plt.title(f'Box plot of {col}') |
| 49 | + plt.show() |
| 50 | + |
| 51 | + |
| 52 | +#Define x and y |
15 | 53 |
|
| 54 | +x=df[['SepalLength', 'SepalWidth','PetalWidth','PetalLength']] #Independent Variables |
| 55 | +y=df['Species'] #Dependent Variable |
| 56 | + |
| 57 | +# Split the dataset into training and testing sets |
| 58 | +from sklearn.model_selection import train_test_split |
| 59 | +x_train, x_test, y_train, y_test=train_test_split (x,y,random_state=0, test_size=0.2) #20% Test Set |
| 60 | + |
| 61 | +#Check Shapes |
| 62 | + |
| 63 | +print("X_train shape:", x_train.shape) |
| 64 | +print("X_test shape:", x_test.shape) |
| 65 | +print("Y_train shape:", y_train.shape) |
| 66 | +print("Y_test shape:", y_test.shape) |
| 67 | + |
| 68 | + |
| 69 | +print(df.describe()) |
| 70 | +from email.errors import FirstHeaderLineIsContinuationDefect |
| 71 | +import pandas as pd |
| 72 | +import numpy as np |
| 73 | +import matplotlib.pyplot as plt |
| 74 | +import seaborn as sns |
| 75 | +import warnings |
| 76 | +from numpy.conftest import dtype |
| 77 | + |
| 78 | +warnings.simplefilter("ignore") |
| 79 | + |
| 80 | + |
| 81 | +#Import iris Dataset. |
16 | 82 | df=pd.read_csv(r'C:\Users\PC\Downloads\iris\iris.data') |
17 | 83 |
|
18 | 84 |
|
|
29 | 95 | print(df.columns) |
30 | 96 |
|
31 | 97 | #Value counts for Species column |
32 | | -print(df['Species'].value_counts()) |
| 98 | +print("Unique Species:", df['Species'].unique()) |
| 99 | +print("Duplicate Rows:", df.duplicated().sum()) |
33 | 100 |
|
34 | 101 | #Visualize the species count |
35 | 102 | sns.pairplot(df,hue='Species', palette='husl') |
| 103 | +sns.set(style="whitegrid") #seaborn style |
36 | 104 | plt.title("Pair Plot of Iris DataSet") |
37 | 105 | plt.show() |
| 106 | + |
| 107 | + |
| 108 | +#Heatmaps |
| 109 | +plt.figure(figsize=(10,6)) |
| 110 | +sns.heatmap(df.drop('Species', axis=1).corr(),annot=True,cmap='coolwarm') |
| 111 | +plt.title("Correlation Matrix") |
| 112 | +plt.show() |
| 113 | + |
| 114 | +#Box Plot for Outlier |
| 115 | +for col in ['SepalLength', 'SepalWidth','PetalLength','PetalWidth']: |
| 116 | + sns.boxplot(df[col]) |
| 117 | + plt.title(f'Box plot of {col}') |
| 118 | + plt.show() |
| 119 | + |
| 120 | + |
| 121 | +#Define x and y |
| 122 | + |
| 123 | +x=df[['SepalLength', 'SepalWidth','PetalWidth','PetalLength']] #Independent Variables |
| 124 | +y=df['Species'] #Dependent Variable |
| 125 | + |
| 126 | +# Split the dataset into training and testing sets |
| 127 | +from sklearn.model_selection import train_test_split |
| 128 | +x_train, x_test, y_train, y_test=train_test_split (x,y,random_state=0, test_size=0.2) #20% Test Set |
| 129 | + |
| 130 | +#Check Shapes |
| 131 | + |
| 132 | +print("X_train shape:", x_train.shape) |
| 133 | +print("X_test shape:", x_test.shape) |
| 134 | +print("Y_train shape:", y_train.shape) |
| 135 | +print("Y_test shape:", y_test.shape) |
| 136 | + |
| 137 | + |
| 138 | +print(df.describe()) |
0 commit comments