import cv2 import pytesseract import os import numpy directory = 'images' # iterate over files in # that directory for filename in os.listdir(directory): f = os.path.join(directory, filename) # checking if it is a file if os.path.isfile(f): image = cv2.imread(f, 0) thresh = 255 - cv2.threshold(image, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1] #bottom 2 locations ROI1 = thresh[1383:1447,77:154] ROI2 = thresh[1383:1447,950:1027] ROI = cv2.hconcat([ROI1, ROI2]) data = pytesseract.image_to_string(ROI, lang='eng',config='--psm 7 -c tessedit_char_whitelist=0123456789i' ) print(data) if os.path.isfile(f'images/{data}-') == False: os.rename(f'{f}', f'images/{data}-') elif os.path.isfile(f'images/{data}-') == True: cv2.imshow(data, ROI) cv2.waitKey(1) manpage = input("please input the number on the page (if the title is correct, enter nothing)") #check if the new name already exists if manpage == "\n": rng = numpy.random.default_rng().random() os.rename (f'images/{data}-', f'images/{data}-.review-{rng}') os.rename(f'{f}', f'images/{data}-') elif manpage != "\n": if os.path.isfile(f'images/{manpage}-') == True: rng = numpy.random.default_rng().random() os.rename (f'images/{manpage}-', f'images/{manpage}-.review-{rng}') os.rename(f'{f}', f'images/{manpage}-') elif os.path.isfile(f'images/{manpage}-') == False: os.rename(f'{f}', f'images/{manpage}-') cv2.destroyAllWindows() cv2.destroyAllWindows()