-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathevaluate.py
More file actions
200 lines (167 loc) · 7.44 KB
/
evaluate.py
File metadata and controls
200 lines (167 loc) · 7.44 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
import torch
import numpy as np
from torch.utils.data import DataLoader
from torchvision import transforms
from data.cityscapes_dataset import CityscapesDataset
from models.unet import UNet
from config import *
from utils import show_prediction
from sklearn.metrics import confusion_matrix, precision_recall_fscore_support, accuracy_score
from skimage.metrics import structural_similarity as ssim
from skimage.metrics import peak_signal_noise_ratio as psnr
import matplotlib.pyplot as plt
import os
from scipy import ndimage
# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Transforms - MUST match training transforms
transform = transforms.Compose([
transforms.Resize(IMAGE_SIZE),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
target_transform = transforms.Compose([
transforms.Resize(IMAGE_SIZE, interpolation=transforms.InterpolationMode.NEAREST),
transforms.PILToTensor()
])
# Dataset and Loader
val_dataset = CityscapesDataset(DATASET_PATH, "val", transform, target_transform)
val_loader = DataLoader(val_dataset, batch_size=1)
# Load model
model = UNet(NUM_CLASSES).to(device)
try:
# Try loading the best model first
checkpoint = torch.load("unet_agritech_best.pth", map_location=device, weights_only=False)
model.load_state_dict(checkpoint['model_state_dict'])
print(f"Loaded best model checkpoint with IoU: {checkpoint.get('best_val_iou', 'N/A'):.4f}")
except:
try:
# Fallback to final model
checkpoint = torch.load("unet_agritech.pth", map_location=device, weights_only=False)
model.load_state_dict(checkpoint['model_state_dict'])
print("Loaded final model checkpoint")
except:
print("Error: Could not load any model checkpoint")
print("Please run training first")
exit(1)
model.eval()
# Metrics containers
class_iou = np.zeros(NUM_CLASSES)
class_precision = np.zeros(NUM_CLASSES)
class_recall = np.zeros(NUM_CLASSES)
class_f1 = np.zeros(NUM_CLASSES)
total_mse = 0
total_ssim = 0
total_psnr = 0
conf_matrix = np.zeros((NUM_CLASSES, NUM_CLASSES), dtype=np.int64)
total_samples = 0
pixel_accuracy_total = 0
def calculate_iou(pred_mask, gt_mask, n_classes):
ious = []
for cls in range(n_classes):
pred_inds = pred_mask == cls
gt_inds = gt_mask == cls
intersection = np.logical_and(pred_inds, gt_inds).sum()
union = np.logical_or(pred_inds, gt_inds).sum()
iou = 1.0 if union == 0 and intersection == 0 else intersection / union if union > 0 else 0.0
ious.append(iou)
return np.array(ious)
def calculate_pixel_accuracy(pred_mask, gt_mask):
valid_mask = gt_mask != 255
if valid_mask.sum() == 0:
return 0.0
correct_pixels = (pred_mask[valid_mask] == gt_mask[valid_mask]).sum()
return correct_pixels / valid_mask.sum()
def post_process_mask(pred_mask, min_size=50):
"""
Post-process prediction mask to remove small isolated regions
"""
processed_mask = pred_mask.copy()
# For each class, remove small connected components
for class_id in np.unique(pred_mask):
if class_id == 255: # Skip ignore index
continue
# Create binary mask for this class
class_mask = (pred_mask == class_id).astype(np.uint8)
# Label connected components
labeled_mask, num_features = ndimage.label(class_mask)
# Remove small components
for i in range(1, num_features + 1):
component_size = np.sum(labeled_mask == i)
if component_size < min_size:
processed_mask[labeled_mask == i] = 0 # Set to background or most frequent neighbor
return processed_mask
print("Evaluating model on validation set...")
with torch.no_grad():
for idx, (img, mask) in enumerate(val_loader):
img, mask = img.to(device), mask.squeeze(1).to(device)
# Test-Time Augmentation for better predictions
output = model(img)
# Horizontal flip TTA
img_flip = torch.flip(img, dims=[3])
output_flip = model(img_flip)
output_flip = torch.flip(output_flip, dims=[3])
# Average predictions
output = (output + output_flip) / 2
preds = torch.argmax(output, dim=1)
pred_np = preds.cpu().numpy()
mask_np = mask.cpu().numpy()
# Apply post-processing to improve mask quality
pred_np[0] = post_process_mask(pred_np[0])
pred_flat = pred_np.flatten()
mask_flat = mask_np.flatten()
valid_pixels = mask_flat != 255
if valid_pixels.sum() == 0:
continue
pred_valid = pred_flat[valid_pixels]
mask_valid = mask_flat[valid_pixels]
iou = calculate_iou(pred_np[0], mask_np[0], NUM_CLASSES)
class_iou += iou
pixel_accuracy_total += calculate_pixel_accuracy(pred_np[0], mask_np[0])
unique_classes = np.unique(np.concatenate([pred_valid, mask_valid]))
unique_classes = unique_classes[unique_classes < NUM_CLASSES]
if len(unique_classes) > 0:
precision, recall, f1, _ = precision_recall_fscore_support(
mask_valid, pred_valid, labels=unique_classes, average=None, zero_division=0
)
for i, cls in enumerate(unique_classes):
class_precision[cls] += precision[i]
class_recall[cls] += recall[i]
class_f1[cls] += f1[i]
conf_matrix += confusion_matrix(mask_valid, pred_valid, labels=range(NUM_CLASSES))
mse = np.mean((pred_np.astype(np.float32) - mask_np.astype(np.float32)) ** 2)
total_mse += mse
pred_norm = pred_np[0].astype(np.float32) / (NUM_CLASSES - 1)
mask_norm = mask_np[0].astype(np.float32) / (NUM_CLASSES - 1)
try:
total_ssim += ssim(pred_norm, mask_norm, data_range=1.0)
total_psnr += psnr(mask_norm, pred_norm, data_range=1.0)
except:
pass
total_samples += 1
if idx < 3:
show_prediction(img.cpu()[0], preds.cpu()[0])
# Final metrics
mean_iou = class_iou / total_samples
mean_precision = class_precision / total_samples
mean_recall = class_recall / total_samples
mean_f1 = class_f1 / total_samples
mean_mse = total_mse / total_samples
mean_ssim = total_ssim / total_samples
mean_psnr = total_psnr / total_samples
mean_pixel_accuracy = pixel_accuracy_total / total_samples
overall_iou = np.mean(mean_iou[mean_iou > 0])
overall_precision = np.mean(mean_precision[mean_precision > 0])
overall_recall = np.mean(mean_recall[mean_recall > 0])
overall_f1 = np.mean(mean_f1[mean_f1 > 0])
# Display results
print("\nFinal Evaluation Results:")
print(f"Mean IoU (mIoU): {overall_iou:.4f}")
print(f"Mean Average Precision (mAP): {overall_precision:.4f}")
print(f"Mean Precision: {overall_precision:.4f}")
print(f"Mean Recall: {overall_recall:.4f}")
print(f"Mean F1 Score: {overall_f1:.4f}")
print(f"Pixel Accuracy: {mean_pixel_accuracy:.4f}")
print(f"Mean Squared Error (MSE): {mean_mse:.4f}")
print(f"Structural Similarity Index (SSIM): {mean_ssim:.4f}")
print(f"Peak Signal-to-Noise Ratio (PSNR): {mean_psnr:.4f}")