diff --git a/fixer.py b/fixer.py new file mode 100644 index 0000000..e63d5bc --- /dev/null +++ b/fixer.py @@ -0,0 +1,30 @@ +import cv2 +import pytesseract +import os +import numpy +directory = 'images' + +# iterate over files in +# that directory +pattern = 'review' # Replace with your target substring +matching_files = [f for f in os.listdir(directory) if pattern in f] + +for files in os.listdir(directory): + if files.__contains__(pattern): + f = os.path.join(directory, files) + print(f) + image = cv2.imread(f, 0) + thresh = 255 - cv2.threshold(image, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1] + #bottom 2 locations + ROI1 = thresh[1383:1447,77:154] + ROI2 = thresh[1383:1447,950:1027] + ROI = cv2.hconcat([ROI1, ROI2]) + cv2.imshow('Review', ROI) + cv2.waitKey(1) + manpage = input("please input the page number") + if os.path.isfile(f'images/{manpage}.jpg') == True: + rng = numpy.random.default_rng().random() + os.rename (f'images/{manpage}.jpg', f'images/{manpage}.review-{rng}.jpg') + os.rename(f'{f}', f'images/{manpage}.jpg') + elif os.path.isfile(f'images/{manpage}.jpg') == False: + os.rename(f'{f}', f'images/{manpage}.jpg') \ No newline at end of file