In [5]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
In [6]:
# Load the dataset
df = pd.read_csv("Iris.csv") # Make sure Iris.csv is in the same folder
# Drop the 'Id' column (not useful for prediction)
df = df.drop(columns=['Id'])
# Convert the 'Species' column (labels) into numbers using LabelEncoder
label_encoder = LabelEncoder()
df['Species'] = label_encoder.fit_transform(df['Species'])
# Separate features (X) and target (y)
X = df.drop(columns=['Species']) # Input features
y = df['Species'] # Target label
# Split the data into training and testing parts
# 80% training, 20% testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# Scale the features using StandardScaler (important for KNN)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# Create and train the KNN model
knn = KNeighborsClassifier(n_neighbors=3) # Try k=3
knn.fit(X_train, y_train)
# Predict the labels for the test set
y_pred = knn.predict(X_test)
# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
Accuracy: 1.0
Classification Report:
precision recall f1-score support
0 1.00 1.00 1.00 19
1 1.00 1.00 1.00 13
2 1.00 1.00 1.00 13
accuracy 1.00 45
macro avg 1.00 1.00 1.00 45
weighted avg 1.00 1.00 1.00 45
Confusion Matrix:
[[19 0 0]
[ 0 13 0]
[ 0 0 13]]
In [7]:
# Plot the confusion matrix
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(6, 4))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
xticklabels=label_encoder.classes_,
yticklabels=label_encoder.classes_)
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()
Explanation of the Confusion Matrix Plot Code
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
xticklabels=label_encoder.classes_,
yticklabels=label_encoder.classes_)
This line of code draws a heatmap (colored grid) to show the confusion matrix in a way that's easy to read.
Here’s what each part means:
sns.heatmap(...)→ This function creates the heatmap using Seaborn, which is a library for making nice-looking plots.cm→ This is the confusion matrix itself. It contains numbers showing how many times the model got something right or wrong.annot=True→ This tells the heatmap to show the actual numbers inside each box.fmt='d'→ This means the numbers should be shown as whole numbers (integers).cmap='Blues'→ This sets the color theme to different shades of blue. Darker blue means bigger numbers.xticklabels=label_encoder.classes_→ This puts the actual class names (like Setosa, Versicolor, Virginica) on the x-axis (predicted values).yticklabels=label_encoder.classes_→ This puts the same class names on the y-axis (actual values).
In short, this heatmap makes it easy to see how well the model is doing. The diagonal boxes show correct predictions. Off-diagonal boxes show mistakes.