In [5]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
In [6]:
# Load the dataset
df = pd.read_csv("Iris.csv")  # Make sure Iris.csv is in the same folder

# Drop the 'Id' column (not useful for prediction)
df = df.drop(columns=['Id'])

# Convert the 'Species' column (labels) into numbers using LabelEncoder
label_encoder = LabelEncoder()
df['Species'] = label_encoder.fit_transform(df['Species'])

# Separate features (X) and target (y)
X = df.drop(columns=['Species'])  # Input features
y = df['Species']                 # Target label

# Split the data into training and testing parts
# 80% training, 20% testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Scale the features using StandardScaler (important for KNN)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Create and train the KNN model
knn = KNeighborsClassifier(n_neighbors=3)  # Try k=3
knn.fit(X_train, y_train)

# Predict the labels for the test set
y_pred = knn.predict(X_test)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
Accuracy: 1.0

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       1.00      1.00      1.00        13
           2       1.00      1.00      1.00        13

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45

Confusion Matrix:
 [[19  0  0]
 [ 0 13  0]
 [ 0  0 13]]
In [7]:
# Plot the confusion matrix
cm = confusion_matrix(y_test, y_pred)

plt.figure(figsize=(6, 4))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=label_encoder.classes_,
            yticklabels=label_encoder.classes_)

plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()
No description has been provided for this image

Explanation of the Confusion Matrix Plot Code

sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=label_encoder.classes_,
            yticklabels=label_encoder.classes_)

This line of code draws a heatmap (colored grid) to show the confusion matrix in a way that's easy to read.

Here’s what each part means:

  • sns.heatmap(...) → This function creates the heatmap using Seaborn, which is a library for making nice-looking plots.
  • cm → This is the confusion matrix itself. It contains numbers showing how many times the model got something right or wrong.
  • annot=True → This tells the heatmap to show the actual numbers inside each box.
  • fmt='d' → This means the numbers should be shown as whole numbers (integers).
  • cmap='Blues' → This sets the color theme to different shades of blue. Darker blue means bigger numbers.
  • xticklabels=label_encoder.classes_ → This puts the actual class names (like Setosa, Versicolor, Virginica) on the x-axis (predicted values).
  • yticklabels=label_encoder.classes_ → This puts the same class names on the y-axis (actual values).

In short, this heatmap makes it easy to see how well the model is doing. The diagonal boxes show correct predictions. Off-diagonal boxes show mistakes.