-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathwebCamDigReader.py
More file actions
316 lines (232 loc) · 9.58 KB
/
webCamDigReader.py
File metadata and controls
316 lines (232 loc) · 9.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
import cv2
import numpy as np
import pytesseract
import csv
import re
from datetime import datetime
import os
import sys
import json
import shutil
import time
import argparse
import textwrap
import logging
camera = 1
delay_time_sec = 10
coordinates_file = "rectangles_coordinates.json"
output_file = "data_captured.csv"
rectangles = []
current_rectangle = []
cropping = False
mouse_pos = (0, 0)
# Find the path to the Tesseract executable
tesseract_path = shutil.which("tesseract")
# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# This function checks the Tesseract path and sets it
def check_tesseract_path():
if tesseract_path:
pytesseract.pytesseract.tesseract_cmd = tesseract_path
else:
logger.error(f"Could not find Tesseract executable at {pytesseract.pytesseract.tesseract_cmd}. Please check the path and try again.")
raise Exception("Tesseract executable not found")
# Mouse click event for drawing freely
def click_and_crop(event, x, y, flags, param):
global rectangles, current_rectangle, cropping, mouse_pos
mouse_pos = (x, y)
if event == cv2.EVENT_LBUTTONDOWN:
cropping = True
current_rectangle = [(x, y)]
elif event == cv2.EVENT_MOUSEMOVE:
# If drawing, keep adding points to the current rectangle
if cropping:
current_rectangle.append((x, y))
elif event == cv2.EVENT_LBUTTONUP:
# Stop drawing
cropping = False
rectangles.append(current_rectangle)
def adjust_gamma(image, gamma=1.0):
invGamma = 1.0 / gamma
table = np.array([((i / 255.0) ** invGamma) * 255
for i in np.arange(0, 256)]).astype("uint8")
return cv2.LUT(image, table)
# Process the image and extract data from the region of interest
def process_image(frame, points, debug=False, roi_id=0):
mask = np.zeros(frame.shape, dtype=np.uint8)
roi_corners = np.array([points], dtype=np.int32)
channel_count = frame.shape[2]
ignore_mask_color = (255,)*channel_count
cv2.fillPoly(mask, roi_corners, ignore_mask_color)
masked_image = cv2.bitwise_and(frame, mask)
# Find the bounding rectangle and extract the ROI
x,y,w,h = cv2.boundingRect(np.array([points]))
roi = masked_image[y:y+h, x:x+w]
# Save original ROI image if debug is True
if debug:
cv2.imwrite(f'roi_{roi_id}_original.png', roi)
# Resize the image
roi = cv2.resize(roi, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
# Save resized image if debug is True
if debug:
cv2.imwrite(f'roi_{roi_id}_resized.png', roi)
# Convert to grayscale
gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
# Save grayscale image if debug is True
if debug:
cv2.imwrite(f'roi_{roi_id}_gray.png', gray)
# Apply Gaussian blur to remove noise
gray = cv2.GaussianBlur(gray, (5, 5), 0)
# Equalize
gray = cv2.equalizeHist(gray)
# Adjust brightness
gray = adjust_gamma(gray, gamma_value)
# Save blurred image if debug is True
if debug:
cv2.imwrite(f'roi_{roi_id}_blurred.png', gray)
# Threshold the image to get a binary image
ret, threshold = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# Save threshold image if debug is True
if debug:
cv2.imwrite(f'roi_{roi_id}_{datetime.now()}_threshold.png', threshold)
# Set Tesseract config
# custom_config = r'--oem 3 --psm 6 outputbase digits'
custom_config = r'-c tessedit_char_whitelist=0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ., --psm 6'
data = pytesseract.image_to_string(threshold, config=custom_config)
return data
# Save the rectangles' coordinates to a file
def save_coordinates():
with open(coordinates_file, "w") as file:
json.dump(rectangles, file)
# Load the rectangles' coordinates from a file
def load_coordinates():
global rectangles
with open(coordinates_file, "r") as file:
rectangles = json.load(file)
def convert_frequency(data):
# Convert MHz to Hz
data = re.sub(r'(\d+(\.\d+)?)(\s*)MHz', lambda match: str(float(match.group(1)) * 1e6), data)
# Convert kHz to Hz
data = re.sub(r'(\d+(\.\d+)?)(\s*)kHz', lambda match: str(float(match.group(1)) * 1e3), data)
# Remove Hz
data = re.sub(r'(\d+(\.\d+)?)(\s*)Hz', lambda match: match.group(1), data)
return data
# Capture data from the camera and process the frames
def capture_data():
cap = cv2.VideoCapture(camera)
if not cap.isOpened():
logger.error('Unable to open the camera')
time.sleep(delay_time_sec)
return
last_capture = time.time()
while True:
ret, frame = cap.read()
if not ret:
logger.error('Unable to read frame')
break
display_frame = frame.copy()
# Draw the rectangles on every frame
for rect in rectangles:
cv2.polylines(display_frame, [np.array(rect)], True, (0, 255, 0), 2)
# Check if it's time to capture data
if time.time() - last_capture >= delay_time_sec:
last_capture = time.time()
data_list = []
for count, rect in enumerate(rectangles, 1):
data = process_image(frame, rect, args.debug, roi_id=count)
data = data.replace('\n', ' ').replace('\r', '')
# Convert frequencies to Hz
data = convert_frequency(data)
logger.info(f'Data from ROI {count}: {data}')
data_list.append(data)
with open(output_file, 'a', newline='') as csvfile:
writer = csv.writer(csvfile)
writer.writerow([datetime.now()] + data_list)
# Display the frame with the rectangle
cv2.imshow("image", display_frame)
key = cv2.waitKey(1) & 0xFF
# Quit the application if 'q' is pressed
if key == ord("q"):
break
cap.release()
cv2.destroyAllWindows()
# Main function
def main():
global rectangles, current_rectangle, cropping, mouse_pos
if os.path.exists(coordinates_file):
load_coordinates()
while len(rectangles) != num_regions:
rectangles = []
cv2.namedWindow("image")
cv2.setMouseCallback("image", click_and_crop)
cap = cv2.VideoCapture(camera)
while len(rectangles) != num_regions:
ret, frame = cap.read()
if not ret:
logger.error('Unable to read frame')
break
# Display the drawn shapes on the frame
for shape in rectangles:
cv2.polylines(frame, [np.array(shape)], True, (0, 255, 0), 2)
if cropping and len(current_rectangle) > 1:
cv2.polylines(frame, [np.array(current_rectangle)], True, (0, 255, 0), 2)
cv2.imshow("image", frame)
key = cv2.waitKey(1) & 0xFF
# Reset all shapes if 'r' is pressed
if key == ord("r"):
rectangles = []
# Save and break from the loop if 's' is pressed or if the correct number of regions are selected
elif key == ord("s") or len(rectangles) == num_regions:
save_coordinates()
break
# Quit the application if 'q' is pressed
elif key == ord("q"):
cap.release()
cv2.destroyAllWindows()
sys.exit()
cap.release()
cv2.destroyAllWindows()
capture_data()
if __name__ == "__main__":
description = '''
WebCamDigReader captures and recognizes
numerical data from designated screen regions,
and subsequently saves the results in a CSV file.
You can define these regions, as well as various
other parameters, to customize your experience.
On the window selection screen,
you can use the following commands:
- 'r': Reset all drawn regions
- 's': Save your defined regions
- 'q': Quit the application
'''
parser = argparse.ArgumentParser(
description=textwrap.dedent(description),
formatter_class=argparse.RawDescriptionHelpFormatter
)
parser.add_argument("-r", "--regions", type=int, default=1,
help="Specify the number of regions to select. Defaults to 1.")
parser.add_argument("-g", "--gamma", type=float, default=1.0,
help="Adjust the gamma of the image for better brightness. Defaults to 1")
parser.add_argument("-c", "--camera", type=int, default=0,
help="Select the camera source. Defaults to 0.")
parser.add_argument("-d", "--delay", type=int, default=10,
help="Set the delay between frames in seconds. Defaults to 10.")
parser.add_argument("-f", "--file", type=str, default="rectangles_coordinates.json",
help="Provide the JSON file with coordinates. Defaults to \"rectangles_coordinates.json\" ")
parser.add_argument("-o", "--out", type=str, default="data_captured.csv",
help="Specify the output file for the CSV data. Defaults to \"data_captured.csv\".")
parser.add_argument("-debug", "--debug", action='store_true',
help="Enable debug mode. If set, intermediate images will be saved during preprocessing.")
args = parser.parse_args()
# Show help when no parameters are provided
if len(sys.argv) == 1:
parser.print_help(sys.stderr)
sys.exit(1)
num_regions = args.regions
gamma_value = args.gamma
camera = args.camera
delay_time_sec = args.delay
check_tesseract_path()
main()