-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathimage_preprocessing.py
More file actions
109 lines (95 loc) · 4.4 KB
/
image_preprocessing.py
File metadata and controls
109 lines (95 loc) · 4.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
"""
Responsible for doing image preprocessing. Image preprocessing is need to make the image clearer, crispier and easier to read.
OCR performs better if the image is:
- grayscale
- denoised
- binarised
grayscale conversion: Convert from RGB space to grayscale space. Thus converting a color image into a black-and-white image. Each pixel represents intensity rather than color. Reduces complexity from 3 color channels to 1.
denoised: Applies filters and removes dots, specks and blurs. Currently MedianBlur algorithm is being used with Pillow.
binarised: Convert grayscale image to binary. Removed background clutter. Each pixel becomes either black(0) or white(255). Dark becomes darker and light becomes lighter.
TODO:
- DPI Normalization: to make the image crispier and easy to read
- Contour detection
- Border Removal
- Erosion and Dilation
- Edge detection
- Rotation and Alignment
Currently we are using Pillow, which is basic. We can move to opencv which has better denoising and binarization support. Also it supports contour detection and DPI normalization.
"""
import os
import logging
from PIL import Image, ImageFilter
import cv2 as cv
logger = logging.getLogger(__name__)
def preprocess_image(file_path: str):
try:
base, ext = os.path.splitext(file_path)
ext = ext.lstrip(".")
im = Image.open(file_path)
image_grayscale = im.convert("L")
image_denoised = image_grayscale.filter(ImageFilter.MedianFilter(size=3))
image_binary = image_denoised.point(lambda x: 0 if x < 128 else 255, mode='1')
output_path = f"{base}-processed.{ext}"
image_binary.save(output_path)
return output_path
except Exception as exc:
logger.error(f"Exception {exc} ocurred during image preprocessing.")
return file_path
def preprocess_image_opencv(file_path: str, options: dict = None, source: str = "image"):
"""
source could be one of 'image' or 'pdf'. We will apply fastNlMeansDenoising to PDF pages converted to images,
while apply bilateralFilter to camera images.
Currently performs:
- Color space conversion from RGB to Grayscale, to make the image easier to read
- Denoising, Smoothing and Blurring to remove specks/grains, using fastNlMeansDenoising or bilateralFilter
- Binarisation, to have black text on a white background
TODO:
- Cropping the area of interest
- Rotation and Alignment: Using Canny, HoughLines.
Usage:
preprocess_image_opencv("/media/ocr-files/ocr-pan.jpeg", options={"denoise": False, "binarize": False})
"""
default_options = {
"gray": True,
"denoise": True,
"binarize": True,
}
if options is None:
options = {}
default_options.update(options)
logger.info(f"file_path: {file_path}, options: {default_options}")
base, ext = os.path.splitext(file_path)
ext = ext.lstrip(".")
img = cv.imread(file_path)
if default_options['denoise'] is True and default_options['gray'] is False:
# Force grayscale, as denoising is done in grayscale
logger.info("Forcing grayscale, as denoising is done in grayscale")
default_options['gray'] = True
if default_options['binarize'] is True and default_options['gray'] is False:
# Force grayscale, as denoising is done in grayscale
logger.info("Forcing grayscale, as binarizing is done in grayscale")
default_options['gray'] = True
if default_options['gray'] is True:
img = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
if default_options['denoise'] is True:
if source == "pdf":
logger.info("Applying fastNlMeansDenoising")
img = cv.fastNlMeansDenoising(img, h=30)
else:
logger.info("Applying bilateralFilter")
# It smoothes the image without losing edges
img = cv.bilateralFilter(img, d=9, sigmaColor=75, sigmaSpace=75)
if default_options['binarize'] is True:
img = cv.adaptiveThreshold(
img,
maxValue=255,
adaptiveMethod=cv.ADAPTIVE_THRESH_GAUSSIAN_C, # or MEAN_C
thresholdType=cv.THRESH_BINARY,
blockSize=11, # size of the neighborhood (must be odd)
C=2 # constant subtracted from the mean
)
# Check if dilation and erosion needed
# Find the margins and crop
output_path = f"{base}-cv-processed.{ext}"
cv.imwrite(output_path, img)
return output_path