Update Irist_scanner.py

GSAPify · web-flow · commit 1e995d24f83e · 2025-03-07T10:42:53.000+05:30
diff --git a/IrisFlowerClassification Using Machine Learning/Irist_scanner.py b/IrisFlowerClassification Using Machine Learning/Irist_scanner.py
@@ -1,18 +1,84 @@
 from email.errors import FirstHeaderLineIsContinuationDefect
-
 import pandas as pd
 import numpy as np
 import matplotlib.pyplot as plt
 import seaborn as sns
 import warnings
-
 from numpy.conftest import dtype
 
 warnings.simplefilter("ignore")
 
 
 #Import iris Dataset.
+df=pd.read_csv(r'C:\Users\PC\Downloads\iris\iris.data')
+
+
+# Assign column names to the dataset
+df.columns = ['SepalLength', 'SepalWidth', 'PetalLength','PetalWidth', 'Species']
+
+# Dataset Info
+df.info()
+
+# Checking for null values
+print(df.isnull().sum())
+
+# Display column names
+print(df.columns)
+
+#Value counts for Species column
+print("Unique Species:", df['Species'].unique())
+print("Duplicate Rows:", df.duplicated().sum())
+
+#Visualize the species count
+sns.pairplot(df,hue='Species', palette='husl')
+sns.set(style="whitegrid") #seaborn style
+plt.title("Pair Plot of Iris DataSet")
+plt.show()
+
+
+#Heatmaps
+plt.figure(figsize=(10,6))
+sns.heatmap(df.drop('Species', axis=1).corr(),annot=True,cmap='coolwarm')
+plt.title("Correlation Matrix")
+plt.show()
+
+#Box Plot for Outlier
+for col in ['SepalLength', 'SepalWidth','PetalLength','PetalWidth']:
+    sns.boxplot(df[col])
+    plt.title(f'Box plot of {col}')
+    plt.show()
+
+
+#Define x and y
 
+x=df[['SepalLength', 'SepalWidth','PetalWidth','PetalLength']] #Independent Variables
+y=df['Species'] #Dependent Variable
+
+# Split the dataset into training and testing sets
+from sklearn.model_selection import train_test_split
+x_train, x_test, y_train, y_test=train_test_split (x,y,random_state=0, test_size=0.2) #20% Test Set
+
+#Check Shapes
+
+print("X_train shape:", x_train.shape)
+print("X_test shape:", x_test.shape)
+print("Y_train shape:", y_train.shape)
+print("Y_test shape:", y_test.shape)
+
+
+print(df.describe())
+from email.errors import FirstHeaderLineIsContinuationDefect
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+import seaborn as sns
+import warnings
+from numpy.conftest import dtype
+
+warnings.simplefilter("ignore")
+
+
+#Import iris Dataset.
 df=pd.read_csv(r'C:\Users\PC\Downloads\iris\iris.data')
 
 
@@ -29,9 +95,44 @@
 print(df.columns)
 
 #Value counts for Species column
-print(df['Species'].value_counts())
+print("Unique Species:", df['Species'].unique())
+print("Duplicate Rows:", df.duplicated().sum())
 
 #Visualize the species count
 sns.pairplot(df,hue='Species', palette='husl')
+sns.set(style="whitegrid") #seaborn style
 plt.title("Pair Plot of Iris DataSet")
 plt.show()
+
+
+#Heatmaps
+plt.figure(figsize=(10,6))
+sns.heatmap(df.drop('Species', axis=1).corr(),annot=True,cmap='coolwarm')
+plt.title("Correlation Matrix")
+plt.show()
+
+#Box Plot for Outlier
+for col in ['SepalLength', 'SepalWidth','PetalLength','PetalWidth']:
+    sns.boxplot(df[col])
+    plt.title(f'Box plot of {col}')
+    plt.show()
+
+
+#Define x and y
+
+x=df[['SepalLength', 'SepalWidth','PetalWidth','PetalLength']] #Independent Variables
+y=df['Species'] #Dependent Variable
+
+# Split the dataset into training and testing sets
+from sklearn.model_selection import train_test_split
+x_train, x_test, y_train, y_test=train_test_split (x,y,random_state=0, test_size=0.2) #20% Test Set
+
+#Check Shapes
+
+print("X_train shape:", x_train.shape)
+print("X_test shape:", x_test.shape)
+print("Y_train shape:", y_train.shape)
+print("Y_test shape:", y_test.shape)
+
+
+print(df.describe())