Textbook-Arrange/main.py

21 lines
706 B
Python

import cv2
import pytesseract
import os
directory = 'images'
# iterate over files in
# that directory
for filename in os.listdir(directory):
f = os.path.join(directory, filename)
# checking if it is a file
if os.path.isfile(f):
image = cv2.imread(f, 0)
thresh = 255 - cv2.threshold(image, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
#bottom 2 locations
ROI1 = thresh[1383:1447,77:154]
ROI2 = thresh[1383:1447,950:1027]
ROI = cv2.hconcat([ROI1, ROI2])
data = pytesseract.image_to_string(ROI, lang='eng',config='--psm 7 -c tessedit_char_whitelist=0123456789' )
print(data)
os.rename(f'{f}', f'images/{data}.jpg')