-
Notifications
You must be signed in to change notification settings - Fork 2.1k
I am working on a project in my internship and I am training a model which detects the distresses on road for this i have multiple images that have already been labeled by myself only and i have also written code for that but it does not able to work properly so want the guidance for that. #1224
Description
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Embedding, LSTM, TimeDistributed, RepeatVector
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import matplotlib.patches as patches
Step 1: Load the data from the directories
image_dir = 'D:/Image Classification using CNN/1'
text_dir = 'D:/Image Classification using CNN/1-Labled'
image_files = sorted([f for f in os.listdir(image_dir) if f.endswith('.jpg')])
image_paths = [os.path.join(image_dir, f) for f in image_files]
text_paths = [os.path.join(text_dir, f.replace('.jpg', '.txt')) for f in image_files]
images = []
labels = []
Function to load labels from text file
def load_labels(label_path):
with open(label_path, 'r') as file:
return [float(x) for x in file.read().strip().split()]
target_size = (150, 150) # Define your target size here
for img_path, txt_path in zip(image_paths, text_paths):
try:
img = load_img(img_path, target_size=target_size) # Load and resize to target_size
img = img_to_array(img)
img = img / 255.0 # Normalize to [0, 1]
lbl = load_labels(txt_path)
images.append(img)
labels.append(lbl)
except Exception as e:
print(f"Error loading {img_path} or {txt_path}: {e}")
Step 2: Pad the labels to the maximum length
max_label_length = max(len(label) for label in labels)
labels = pad_sequences(labels, maxlen=max_label_length, padding='post', dtype='float32')
Convert lists to numpy arrays
images = np.array(images)
labels = np.array(labels)
Split the data into training and validation sets
x_train, x_val, y_train, y_val = train_test_split(images, labels, test_size=0.2, random_state=42)
Step 3: Build the model
Image model
image_input = Input(shape=(target_size[0], target_size[1], 3)) # Use target_size dimensions
x = Conv2D(32, (3, 3), activation='relu')(image_input)
x = MaxPooling2D(2, 2)(x)
x = Conv2D(64, (3, 3), activation='relu')(x)
x = MaxPooling2D(2, 2)(x)
x = Conv2D(128, (3, 3), activation='relu')(x)
x = MaxPooling2D(2, 2)(x)
x = Flatten()(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
image_features = Dense(256, activation='relu')(x)
Label model
label_input = Input(shape=(max_label_length,))
embedded_labels = Embedding(input_dim=1000, output_dim=256, input_length=max_label_length)(label_input)
lstm_out = LSTM(256, return_sequences=True)(embedded_labels)
Repeat image features to match the label sequence length
repeated_image_features = RepeatVector(max_label_length)(image_features)
Combining repeated image features and LSTM output
combined = tf.keras.layers.Concatenate()([repeated_image_features, lstm_out])
output = TimeDistributed(Dense(1, activation='sigmoid'))(combined) # Adjust output for multi-label
model = Model(inputs=[image_input, label_input], outputs=output)
Step 4: Compile the model
model.compile(optimizer='adam',
loss='binary_crossentropy', # Use binary cross-entropy for multi-label classification
metrics=['accuracy', 'binary_accuracy']) # Add binary_accuracy for evaluation
Step 5: Train the model
history = model.fit(
[x_train, y_train], y_train,
epochs=20,
batch_size=20,
validation_data=([x_val, y_val], y_val)
)
Step 6: Evaluate the model
loss, accuracy, binary_accuracy = model.evaluate([x_val, y_val], y_val)
print(f"Validation accuracy: {accuracy100:.2f}%")
print(f"Validation binary accuracy: {binary_accuracy100:.2f}%")
Step 7: Visualize predictions with annotations
def visualize_predictions(images, labels, predictions, file_names, target_size):
num_images = len(images)
for i in range(num_images):
fig, ax = plt.subplots(figsize=(images[i].shape[1] / 100, images[i].shape[0] / 100)) # Use image dimensions for figsize
ax.imshow(images[i])
ax.set_title(f"Image: {file_names[i]}")
# Draw rectangles based on predictions
for j, pred in enumerate(predictions[i]):
if pred > 0.5: # Adjust threshold as needed
# Calculate box dimensions based on label position and target_size
box_x = target_size[1] * (j + 1) / (max_label_length + 1)
box_y = target_size[0] * 0.05
box_width = target_size[1] * 0.2
box_height = target_size[0] * 0.1
rect = patches.Rectangle((box_x, box_y), box_width, box_height, linewidth=1, edgecolor='r', facecolor='none')
ax.add_patch(rect)
ax.axis('off')
plt.show()
Predict on validation data
predictions = model.predict([x_val, y_val])
Convert predictions to binary format for visualization
predictions_binary = (predictions > 0.5).astype(int)
Display some predictions with annotations
num_visualize = min(5, len(x_val)) # Visualize up to 5 images, adjust as needed
visualize_predictions(x_val[:num_visualize], y_val[:num_visualize], predictions_binary[:num_visualize], image_files[:num_visualize], target_size)