I am working on a project in my internship and I am training a model which detects the distresses on road for this i have multiple images that have already been labeled by myself only and i have also written code for that but it does not able to work properly so want the guidance for that.

import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Embedding, LSTM, TimeDistributed, RepeatVector
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import matplotlib.patches as patches

# Step 1: Load the data from the directories
image_dir = 'D:/Image Classification using CNN/1'
text_dir = 'D:/Image Classification using CNN/1-Labled'

image_files = sorted([f for f in os.listdir(image_dir) if f.endswith('.jpg')])
image_paths = [os.path.join(image_dir, f) for f in image_files]
text_paths = [os.path.join(text_dir, f.replace('.jpg', '.txt')) for f in image_files]

images = []
labels = []

# Function to load labels from text file
def load_labels(label_path):
    with open(label_path, 'r') as file:
        return [float(x) for x in file.read().strip().split()]

target_size = (150, 150)  # Define your target size here

for img_path, txt_path in zip(image_paths, text_paths):
    try:
        img = load_img(img_path, target_size=target_size)  # Load and resize to target_size
        img = img_to_array(img)
        img = img / 255.0  # Normalize to [0, 1]
        
        lbl = load_labels(txt_path)
        
        images.append(img)
        labels.append(lbl)
    except Exception as e:
        print(f"Error loading {img_path} or {txt_path}: {e}")

# Step 2: Pad the labels to the maximum length
max_label_length = max(len(label) for label in labels)
labels = pad_sequences(labels, maxlen=max_label_length, padding='post', dtype='float32')

# Convert lists to numpy arrays
images = np.array(images)
labels = np.array(labels)

# Split the data into training and validation sets
x_train, x_val, y_train, y_val = train_test_split(images, labels, test_size=0.2, random_state=42)

# Step 3: Build the model
# Image model
image_input = Input(shape=(target_size[0], target_size[1], 3))  # Use target_size dimensions
x = Conv2D(32, (3, 3), activation='relu')(image_input)
x = MaxPooling2D(2, 2)(x)
x = Conv2D(64, (3, 3), activation='relu')(x)
x = MaxPooling2D(2, 2)(x)
x = Conv2D(128, (3, 3), activation='relu')(x)
x = MaxPooling2D(2, 2)(x)
x = Flatten()(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
image_features = Dense(256, activation='relu')(x)

# Label model
label_input = Input(shape=(max_label_length,))
embedded_labels = Embedding(input_dim=1000, output_dim=256, input_length=max_label_length)(label_input)
lstm_out = LSTM(256, return_sequences=True)(embedded_labels)

# Repeat image features to match the label sequence length
repeated_image_features = RepeatVector(max_label_length)(image_features)

# Combining repeated image features and LSTM output
combined = tf.keras.layers.Concatenate()([repeated_image_features, lstm_out])
output = TimeDistributed(Dense(1, activation='sigmoid'))(combined)  # Adjust output for multi-label

model = Model(inputs=[image_input, label_input], outputs=output)

# Step 4: Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',  # Use binary cross-entropy for multi-label classification
              metrics=['accuracy', 'binary_accuracy'])  # Add binary_accuracy for evaluation

# Step 5: Train the model
history = model.fit(
    [x_train, y_train], y_train,
    epochs=20,
    batch_size=20,
    validation_data=([x_val, y_val], y_val)
)

# Step 6: Evaluate the model
loss, accuracy, binary_accuracy = model.evaluate([x_val, y_val], y_val)
print(f"Validation accuracy: {accuracy*100:.2f}%")
print(f"Validation binary accuracy: {binary_accuracy*100:.2f}%")

# Step 7: Visualize predictions with annotations
def visualize_predictions(images, labels, predictions, file_names, target_size):
    num_images = len(images)
    for i in range(num_images):
        fig, ax = plt.subplots(figsize=(images[i].shape[1] / 100, images[i].shape[0] / 100))  # Use image dimensions for figsize
        ax.imshow(images[i])
        ax.set_title(f"Image: {file_names[i]}")
        
        # Draw rectangles based on predictions
        for j, pred in enumerate(predictions[i]):
            if pred > 0.5:  # Adjust threshold as needed
                # Calculate box dimensions based on label position and target_size
                box_x = target_size[1] * (j + 1) / (max_label_length + 1)
                box_y = target_size[0] * 0.05
                box_width = target_size[1] * 0.2
                box_height = target_size[0] * 0.1
                rect = patches.Rectangle((box_x, box_y), box_width, box_height, linewidth=1, edgecolor='r', facecolor='none')
                ax.add_patch(rect)
        
        ax.axis('off')
        plt.show()

# Predict on validation data
predictions = model.predict([x_val, y_val])

# Convert predictions to binary format for visualization
predictions_binary = (predictions > 0.5).astype(int)

# Display some predictions with annotations
num_visualize = min(5, len(x_val))  # Visualize up to 5 images, adjust as needed
visualize_predictions(x_val[:num_visualize], y_val[:num_visualize], predictions_binary[:num_visualize], image_files[:num_visualize], target_size)



Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

I am working on a project in my internship and I am training a model which detects the distresses on road for this i have multiple images that have already been labeled by myself only and i have also written code for that but it does not able to work properly so want the guidance for that. #1224

Step 1: Load the data from the directories

Function to load labels from text file

Step 2: Pad the labels to the maximum length

Convert lists to numpy arrays

Split the data into training and validation sets

Step 3: Build the model

Image model

Label model

Repeat image features to match the label sequence length

Combining repeated image features and LSTM output

Step 4: Compile the model

Step 5: Train the model

Step 6: Evaluate the model

Step 7: Visualize predictions with annotations

Predict on validation data

Convert predictions to binary format for visualization

Display some predictions with annotations

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

I am working on a project in my internship and I am training a model which detects the distresses on road for this i have multiple images that have already been labeled by myself only and i have also written code for that but it does not able to work properly so want the guidance for that. #1224

Description

Step 1: Load the data from the directories

Function to load labels from text file

Step 2: Pad the labels to the maximum length

Convert lists to numpy arrays

Split the data into training and validation sets

Step 3: Build the model

Image model

Label model

Repeat image features to match the label sequence length

Combining repeated image features and LSTM output

Step 4: Compile the model

Step 5: Train the model

Step 6: Evaluate the model

Step 7: Visualize predictions with annotations

Predict on validation data

Convert predictions to binary format for visualization

Display some predictions with annotations

Metadata

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

Issue actions