Textbook-Arrange/main.py

41 lines
1.8 KiB
Python
Raw Normal View History

2024-04-29 20:25:13 +00:00
import cv2
import pytesseract
import os
2024-09-17 18:05:33 +00:00
import numpy
2024-04-29 20:25:13 +00:00
directory = 'images'
2024-09-17 18:17:16 +00:00
2024-04-29 20:25:13 +00:00
# iterate over files in
# that directory
for filename in os.listdir(directory):
f = os.path.join(directory, filename)
# checking if it is a file
if os.path.isfile(f):
image = cv2.imread(f, 0)
thresh = 255 - cv2.threshold(image, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
#bottom 2 locations
ROI1 = thresh[1383:1447,77:154]
ROI2 = thresh[1383:1447,950:1027]
ROI = cv2.hconcat([ROI1, ROI2])
2024-04-30 14:30:27 +00:00
data = pytesseract.image_to_string(ROI, lang='eng',config='--psm 7 -c tessedit_char_whitelist=0123456789i' )
2024-04-29 20:25:13 +00:00
print(data)
2024-04-30 14:30:27 +00:00
if os.path.isfile(f'images/{data}.jpg') == False:
os.rename(f'{f}', f'images/{data}.jpg')
2024-09-17 18:05:33 +00:00
elif os.path.isfile(f'images/{data}.jpg') == True:
cv2.imshow(data, ROI)
cv2.waitKey(1)
manpage = input("please input the number on the page (if the title is correct, enter nothing)")
2024-09-17 18:14:31 +00:00
#check if the new name already exists
2024-09-17 18:05:33 +00:00
if manpage == "\n":
rng = numpy.random.default_rng().random()
os.rename (f'images/{data}.jpg', f'images/{data}.review-{rng}.jpg')
os.rename(f'{f}', f'images/{data}.jpg')
elif manpage != "\n":
if os.path.isfile(f'images/{manpage}.jpg') == True:
rng = numpy.random.default_rng().random()
os.rename (f'images/{manpage}.jpg', f'images/{manpage}.review-{rng}.jpg')
os.rename(f'{f}', f'images/{manpage}.jpg')
elif os.path.isfile(f'images/{manpage}.jpg') == False:
os.rename(f'{f}', f'images/{manpage}.jpg')
cv2.destroyAllWindows()
2024-09-17 18:03:34 +00:00
cv2.destroyAllWindows()