webCamDigReader/webCamDigReader.py at main · fzappa/webCamDigReader · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
import cv2
import numpy as np
import pytesseract
import csv
import re
from datetime import datetime
import os
import sys
import json
import shutil
import time
import argparse
import textwrap
import logging

camera = 1
delay_time_sec = 10
coordinates_file = "rectangles_coordinates.json"
output_file = "data_captured.csv"

rectangles = []
current_rectangle = []
cropping = False
mouse_pos = (0, 0)

# Find the path to the Tesseract executable
tesseract_path = shutil.which("tesseract")

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# This function checks the Tesseract path and sets it
def check_tesseract_path():
    if tesseract_path:
        pytesseract.pytesseract.tesseract_cmd = tesseract_path
    else:
        logger.error(f"Could not find Tesseract executable at {pytesseract.pytesseract.tesseract_cmd}. Please check the path and try again.")
        raise Exception("Tesseract executable not found")

# Mouse click event for drawing freely
def click_and_crop(event, x, y, flags, param):
    global rectangles, current_rectangle, cropping, mouse_pos

    mouse_pos = (x, y)

    if event == cv2.EVENT_LBUTTONDOWN:
        cropping = True
        current_rectangle = [(x, y)]

    elif event == cv2.EVENT_MOUSEMOVE:
        # If drawing, keep adding points to the current rectangle
        if cropping:
            current_rectangle.append((x, y))

    elif event == cv2.EVENT_LBUTTONUP:
        # Stop drawing
        cropping = False
        rectangles.append(current_rectangle)


def adjust_gamma(image, gamma=1.0):
    invGamma = 1.0 / gamma
    table = np.array([((i / 255.0) ** invGamma) * 255
                      for i in np.arange(0, 256)]).astype("uint8")
    return cv2.LUT(image, table)


# Process the image and extract data from the region of interest
def process_image(frame, points, debug=False, roi_id=0):
    mask = np.zeros(frame.shape, dtype=np.uint8)
    roi_corners = np.array([points], dtype=np.int32)
    channel_count = frame.shape[2]
    ignore_mask_color = (255,)*channel_count
    cv2.fillPoly(mask, roi_corners, ignore_mask_color)

    masked_image = cv2.bitwise_and(frame, mask)

    # Find the bounding rectangle and extract the ROI
    x,y,w,h = cv2.boundingRect(np.array([points]))
    roi = masked_image[y:y+h, x:x+w]

    # Save original ROI image if debug is True
    if debug:
        cv2.imwrite(f'roi_{roi_id}_original.png', roi)

    # Resize the image
    roi = cv2.resize(roi, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)

    # Save resized image if debug is True
    if debug:
        cv2.imwrite(f'roi_{roi_id}_resized.png', roi)

    # Convert to grayscale
    gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)

    # Save grayscale image if debug is True
    if debug:
        cv2.imwrite(f'roi_{roi_id}_gray.png', gray)

    # Apply Gaussian blur to remove noise
    gray = cv2.GaussianBlur(gray, (5, 5), 0)

    # Equalize
    gray = cv2.equalizeHist(gray)

    # Adjust brightness
    gray = adjust_gamma(gray, gamma_value)

    # Save blurred image if debug is True
    if debug:
        cv2.imwrite(f'roi_{roi_id}_blurred.png', gray)

    # Threshold the image to get a binary image
    ret, threshold = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

    # Save threshold image if debug is True
    if debug:
        cv2.imwrite(f'roi_{roi_id}_{datetime.now()}_threshold.png', threshold)

    # Set Tesseract config
    # custom_config = r'--oem 3 --psm 6 outputbase digits'
    custom_config = r'-c tessedit_char_whitelist=0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ., --psm 6'
    data = pytesseract.image_to_string(threshold, config=custom_config)

    return data


# Save the rectangles' coordinates to a file
def save_coordinates():
    with open(coordinates_file, "w") as file:
        json.dump(rectangles, file)

# Load the rectangles' coordinates from a file
def load_coordinates():
    global rectangles
    with open(coordinates_file, "r") as file:
        rectangles = json.load(file)


def convert_frequency(data):
    # Convert MHz to Hz
    data = re.sub(r'(\d+(\.\d+)?)(\s*)MHz', lambda match: str(float(match.group(1)) * 1e6), data)

    # Convert kHz to Hz
    data = re.sub(r'(\d+(\.\d+)?)(\s*)kHz', lambda match: str(float(match.group(1)) * 1e3), data)

    # Remove Hz
    data = re.sub(r'(\d+(\.\d+)?)(\s*)Hz', lambda match: match.group(1), data)

    return data


# Capture data from the camera and process the frames
def capture_data():
    cap = cv2.VideoCapture(camera)
    if not cap.isOpened():
        logger.error('Unable to open the camera')
        time.sleep(delay_time_sec)
        return

    last_capture = time.time()

    while True:
        ret, frame = cap.read()
        if not ret:
            logger.error('Unable to read frame')
            break

        display_frame = frame.copy()

        # Draw the rectangles on every frame
        for rect in rectangles:
            cv2.polylines(display_frame, [np.array(rect)], True, (0, 255, 0), 2)

        # Check if it's time to capture data
        if time.time() - last_capture >= delay_time_sec:
            last_capture = time.time()
            data_list = []
            for count, rect in enumerate(rectangles, 1):
                data = process_image(frame, rect, args.debug, roi_id=count)
                data = data.replace('\n', ' ').replace('\r', '')

                # Convert frequencies to Hz
                data = convert_frequency(data)

                logger.info(f'Data from ROI {count}: {data}')
                data_list.append(data)

            with open(output_file, 'a', newline='') as csvfile:
                writer = csv.writer(csvfile)
                writer.writerow([datetime.now()] + data_list)

        # Display the frame with the rectangle
        cv2.imshow("image", display_frame)
        key = cv2.waitKey(1) & 0xFF

        # Quit the application if 'q' is pressed
        if key == ord("q"):
            break

    cap.release()
    cv2.destroyAllWindows()


# Main function
def main():
    global rectangles, current_rectangle, cropping, mouse_pos

    if os.path.exists(coordinates_file):
        load_coordinates()

    while len(rectangles) != num_regions:
        rectangles = []
        cv2.namedWindow("image")
        cv2.setMouseCallback("image", click_and_crop)

        cap = cv2.VideoCapture(camera)

        while len(rectangles) != num_regions:
            ret, frame = cap.read()
            if not ret:
                logger.error('Unable to read frame')
                break

            # Display the drawn shapes on the frame
            for shape in rectangles:
                cv2.polylines(frame, [np.array(shape)], True, (0, 255, 0), 2)
            if cropping and len(current_rectangle) > 1:
                cv2.polylines(frame, [np.array(current_rectangle)], True, (0, 255, 0), 2)

            cv2.imshow("image", frame)
            key = cv2.waitKey(1) & 0xFF

            # Reset all shapes if 'r' is pressed
            if key == ord("r"):
                rectangles = []

            # Save and break from the loop if 's' is pressed or if the correct number of regions are selected
            elif key == ord("s") or len(rectangles) == num_regions:
                save_coordinates()
                break

            # Quit the application if 'q' is pressed
            elif key == ord("q"):
                cap.release()
                cv2.destroyAllWindows()
                sys.exit()

        cap.release()
        cv2.destroyAllWindows()

    capture_data()


if __name__ == "__main__":
    description = '''
    WebCamDigReader captures and recognizes
    numerical data from designated screen regions,
    and subsequently saves the results in a CSV file.
    You can define these regions, as well as various
    other parameters, to customize your experience.

    On the window selection screen,
    you can use the following commands:

    - 'r': Reset all drawn regions
    - 's': Save your defined regions
    - 'q': Quit the application
    '''

    parser = argparse.ArgumentParser(
        description=textwrap.dedent(description),
        formatter_class=argparse.RawDescriptionHelpFormatter
    )

    parser.add_argument("-r", "--regions", type=int, default=1,
                    help="Specify the number of regions to select. Defaults to 1.")

    parser.add_argument("-g", "--gamma", type=float, default=1.0,
                    help="Adjust the gamma of the image for better brightness. Defaults to 1")

    parser.add_argument("-c", "--camera", type=int, default=0,
                        help="Select the camera source. Defaults to 0.")

    parser.add_argument("-d", "--delay", type=int, default=10,
                        help="Set the delay between frames in seconds. Defaults to 10.")

    parser.add_argument("-f", "--file", type=str, default="rectangles_coordinates.json",
                        help="Provide the JSON file with coordinates. Defaults to \"rectangles_coordinates.json\" ")

    parser.add_argument("-o", "--out", type=str, default="data_captured.csv",
                        help="Specify the output file for the CSV data. Defaults to \"data_captured.csv\".")

    parser.add_argument("-debug", "--debug", action='store_true',
                    help="Enable debug mode. If set, intermediate images will be saved during preprocessing.")


    args = parser.parse_args()

    # Show help when no parameters are provided
    if len(sys.argv) == 1:
        parser.print_help(sys.stderr)
        sys.exit(1)

    num_regions = args.regions
    gamma_value = args.gamma
    camera = args.camera
    delay_time_sec = args.delay
    check_tesseract_path()
    main()