-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcheck_image_enhance.py
More file actions
154 lines (125 loc) · 5.42 KB
/
check_image_enhance.py
File metadata and controls
154 lines (125 loc) · 5.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
import cv2
import numpy as np
from matplotlib import pyplot as plt
def is_image_good_for_ocr(image):
# Calculate contrast
contrast = image.max() - image.min()
print("image contrast value")
print(contrast)
# Calculate sharpness using the Laplacian
laplacian = cv2.Laplacian(image, cv2.CV_64F).var()
print("image laplacian value")
print(laplacian)
# Check for noise (simple method using standard deviation)
noise = image.std()
print("image noise value")
print(noise)
# Thresholds for determining if the image is good for OCR
contrast_threshold = 250 # Example threshold
sharpness_threshold = 4810 # Example threshold
noise_threshold = 50 # Example threshold
print("final result")
print("contrast > contrast_threshold : ",contrast > contrast_threshold)
print("laplacian > sharpness_threshold :",laplacian > sharpness_threshold)
print("noise > noise_threshold : ",noise >noise_threshold)
print(contrast > contrast_threshold and
laplacian > sharpness_threshold and
noise > noise_threshold)
return (contrast > contrast_threshold and
laplacian > sharpness_threshold and
noise > noise_threshold)
def deskew_image(image):
# Convert to binary image
_, binary = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
# Find coordinates of all non-zero pixels
coords = np.column_stack(np.where(binary > 0))
# Find the angle of the rotated bounding box
angle = cv2.minAreaRect(coords)[-1]
print("angle")
print(angle)
if angle < -45:
angle = -(90 + angle)
else:
angle = 0
print("after angle")
print(angle)
# Rotate the image to deskew it
(h, w) = image.shape[:2]
center = (w // 2, h // 2)
M = cv2.getRotationMatrix2D(center, angle, 1.0)
deskewed = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
return deskewed
def preprocess_image(image):
# Check for noise (simple method using standard deviation)
noise = image.std()
# Calculate contrast
contrast = image.max() - image.min()
# Calculate sharpness using the Laplacian
laplacian = cv2.Laplacian(image, cv2.CV_64F).var()
# Thresholds for determining if the image is good for OCR
contrast_threshold = 250 # Example threshold
sharpness_threshold = 4810 # Example threshold
noise_threshold = 50 # Example threshold
# if image contrast is not good
if(contrast > contrast_threshold):
# Adjust contrast
alpha = 1 # Contrast control (1.0-3.0)
beta = 10 # Brightness control (0-100)
adjusted = cv2.convertScaleAbs(image, alpha=alpha, beta=beta)
# Apply adaptive thresholding to binarize the image
thresholded = cv2.adaptiveThreshold(adjusted, 255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY, 3, 1.3)
return thresholded
elif(laplacian > sharpness_threshold):
# Apply Gaussian Blur to reduce noise
blurred = cv2.GaussianBlur(image, (5, 5), 1)
# Sharpening the image using a kernel
kernel = np.array([[0, -1, 0],
[-1, 4.20, -1],
[0, -1, 0]])
sharpened = cv2.filter2D(blurred, -1, kernel)
# Adjust contrast
alpha = 1.0 # Contrast control (1.0-3.0)
beta = 20 # Brightness control (0-100)
adjusted = cv2.convertScaleAbs(sharpened, alpha=alpha, beta=beta)
# Apply adaptive thresholding to binarize the image
thresholded = cv2.adaptiveThreshold(adjusted, 255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY, 19, 1.5)
return thresholded
elif(noise > noise_threshold):
# Apply Gaussian Blur to reduce noise
blurred = cv2.GaussianBlur(image, (5, 5), 1)
return blurred
else:
# Adjust contrast
alpha = 1.0 # Contrast control (1.0-3.0)
beta = 20 # Brightness control (0-100)
adjusted = cv2.convertScaleAbs(image, alpha=alpha, beta=beta)
# Apply adaptive thresholding to binarize the image
thresholded = cv2.adaptiveThreshold(adjusted, 255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY, 9, 1.1)
return thresholded
def main(image_path, output_path):
# Load the image
image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
# Deskew the image
deskewed_image = deskew_image(image)
if is_image_good_for_ocr(deskewed_image):
print("Image is good for OCR")
preprocessed_image = deskewed_image
cv2.imwrite(output_path, deskewed_image) # Save the original image
else:
print("Image needs preprocessing")
preprocessed_image = preprocess_image(deskewed_image)
cv2.imwrite(output_path, preprocessed_image) # Save the preprocessed image
# Show the images
plt.figure(figsize=[18, 5])
plt.subplot(131); plt.imshow(image, cmap='gray'); plt.title("Original")
#plt.subplot(132); plt.imshow(deskewed_image, cmap='gray'); plt.title("Deskewed")
plt.subplot(132); plt.imshow(preprocessed_image, cmap='gray'); plt.title("Preprocessed")
plt.show()
# Example usage
main('images/Invoice(3).jpg', 'images/output_image.jpg')