Image_segmentation_using_unet/evaluate.py at main · rishika0212/Image_segmentation_using_unet · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
import torch
import numpy as np
from torch.utils.data import DataLoader
from torchvision import transforms
from data.cityscapes_dataset import CityscapesDataset
from models.unet import UNet
from config import *
from utils import show_prediction
from sklearn.metrics import confusion_matrix, precision_recall_fscore_support, accuracy_score
from skimage.metrics import structural_similarity as ssim
from skimage.metrics import peak_signal_noise_ratio as psnr
import matplotlib.pyplot as plt
import os
from scipy import ndimage

# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Transforms - MUST match training transforms
transform = transforms.Compose([
    transforms.Resize(IMAGE_SIZE),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
target_transform = transforms.Compose([
    transforms.Resize(IMAGE_SIZE, interpolation=transforms.InterpolationMode.NEAREST),
    transforms.PILToTensor()
])

# Dataset and Loader
val_dataset = CityscapesDataset(DATASET_PATH, "val", transform, target_transform)
val_loader = DataLoader(val_dataset, batch_size=1)

# Load model
model = UNet(NUM_CLASSES).to(device)
try:
    # Try loading the best model first
    checkpoint = torch.load("unet_agritech_best.pth", map_location=device, weights_only=False)
    model.load_state_dict(checkpoint['model_state_dict'])
    print(f"Loaded best model checkpoint with IoU: {checkpoint.get('best_val_iou', 'N/A'):.4f}")
except:
    try:
        # Fallback to final model
        checkpoint = torch.load("unet_agritech.pth", map_location=device, weights_only=False)
        model.load_state_dict(checkpoint['model_state_dict'])
        print("Loaded final model checkpoint")
    except:
        print("Error: Could not load any model checkpoint")
        print("Please run training first")
        exit(1)

model.eval()

# Metrics containers
class_iou = np.zeros(NUM_CLASSES)
class_precision = np.zeros(NUM_CLASSES)
class_recall = np.zeros(NUM_CLASSES)
class_f1 = np.zeros(NUM_CLASSES)
total_mse = 0
total_ssim = 0
total_psnr = 0
conf_matrix = np.zeros((NUM_CLASSES, NUM_CLASSES), dtype=np.int64)
total_samples = 0
pixel_accuracy_total = 0

def calculate_iou(pred_mask, gt_mask, n_classes):
    ious = []
    for cls in range(n_classes):
        pred_inds = pred_mask == cls
        gt_inds = gt_mask == cls
        intersection = np.logical_and(pred_inds, gt_inds).sum()
        union = np.logical_or(pred_inds, gt_inds).sum()
        iou = 1.0 if union == 0 and intersection == 0 else intersection / union if union > 0 else 0.0
        ious.append(iou)
    return np.array(ious)

def calculate_pixel_accuracy(pred_mask, gt_mask):
    valid_mask = gt_mask != 255
    if valid_mask.sum() == 0:
        return 0.0
    correct_pixels = (pred_mask[valid_mask] == gt_mask[valid_mask]).sum()
    return correct_pixels / valid_mask.sum()

def post_process_mask(pred_mask, min_size=50):
    """
    Post-process prediction mask to remove small isolated regions
    """
    processed_mask = pred_mask.copy()

    # For each class, remove small connected components
    for class_id in np.unique(pred_mask):
        if class_id == 255:  # Skip ignore index
            continue

        # Create binary mask for this class
        class_mask = (pred_mask == class_id).astype(np.uint8)

        # Label connected components
        labeled_mask, num_features = ndimage.label(class_mask)

        # Remove small components
        for i in range(1, num_features + 1):
            component_size = np.sum(labeled_mask == i)
            if component_size < min_size:
                processed_mask[labeled_mask == i] = 0  # Set to background or most frequent neighbor

    return processed_mask

print("Evaluating model on validation set...")

with torch.no_grad():
    for idx, (img, mask) in enumerate(val_loader):
        img, mask = img.to(device), mask.squeeze(1).to(device)

        # Test-Time Augmentation for better predictions
        output = model(img)

        # Horizontal flip TTA
        img_flip = torch.flip(img, dims=[3])
        output_flip = model(img_flip)
        output_flip = torch.flip(output_flip, dims=[3])

        # Average predictions
        output = (output + output_flip) / 2
        preds = torch.argmax(output, dim=1)

        pred_np = preds.cpu().numpy()
        mask_np = mask.cpu().numpy()

        # Apply post-processing to improve mask quality
        pred_np[0] = post_process_mask(pred_np[0])
        pred_flat = pred_np.flatten()
        mask_flat = mask_np.flatten()

        valid_pixels = mask_flat != 255
        if valid_pixels.sum() == 0:
            continue

        pred_valid = pred_flat[valid_pixels]
        mask_valid = mask_flat[valid_pixels]

        iou = calculate_iou(pred_np[0], mask_np[0], NUM_CLASSES)
        class_iou += iou
        pixel_accuracy_total += calculate_pixel_accuracy(pred_np[0], mask_np[0])

        unique_classes = np.unique(np.concatenate([pred_valid, mask_valid]))
        unique_classes = unique_classes[unique_classes < NUM_CLASSES]

        if len(unique_classes) > 0:
            precision, recall, f1, _ = precision_recall_fscore_support(
                mask_valid, pred_valid, labels=unique_classes, average=None, zero_division=0
            )
            for i, cls in enumerate(unique_classes):
                class_precision[cls] += precision[i]
                class_recall[cls] += recall[i]
                class_f1[cls] += f1[i]

        conf_matrix += confusion_matrix(mask_valid, pred_valid, labels=range(NUM_CLASSES))
        mse = np.mean((pred_np.astype(np.float32) - mask_np.astype(np.float32)) ** 2)
        total_mse += mse

        pred_norm = pred_np[0].astype(np.float32) / (NUM_CLASSES - 1)
        mask_norm = mask_np[0].astype(np.float32) / (NUM_CLASSES - 1)

        try:
            total_ssim += ssim(pred_norm, mask_norm, data_range=1.0)
            total_psnr += psnr(mask_norm, pred_norm, data_range=1.0)
        except:
            pass

        total_samples += 1
        if idx < 3:
            show_prediction(img.cpu()[0], preds.cpu()[0])

# Final metrics
mean_iou = class_iou / total_samples
mean_precision = class_precision / total_samples
mean_recall = class_recall / total_samples
mean_f1 = class_f1 / total_samples
mean_mse = total_mse / total_samples
mean_ssim = total_ssim / total_samples
mean_psnr = total_psnr / total_samples
mean_pixel_accuracy = pixel_accuracy_total / total_samples

overall_iou = np.mean(mean_iou[mean_iou > 0])
overall_precision = np.mean(mean_precision[mean_precision > 0])
overall_recall = np.mean(mean_recall[mean_recall > 0])
overall_f1 = np.mean(mean_f1[mean_f1 > 0])

# Display results
print("\nFinal Evaluation Results:")
print(f"Mean IoU (mIoU):                    {overall_iou:.4f}")
print(f"Mean Average Precision (mAP):       {overall_precision:.4f}")
print(f"Mean Precision:                     {overall_precision:.4f}")
print(f"Mean Recall:                        {overall_recall:.4f}")
print(f"Mean F1 Score:                      {overall_f1:.4f}")
print(f"Pixel Accuracy:                     {mean_pixel_accuracy:.4f}")
print(f"Mean Squared Error (MSE):           {mean_mse:.4f}")
print(f"Structural Similarity Index (SSIM): {mean_ssim:.4f}")
print(f"Peak Signal-to-Noise Ratio (PSNR):  {mean_psnr:.4f}")