Compare commits

...

19 Commits
v0.1 ... main

Author SHA1 Message Date
Kabooshki 1418f3954b
add to gitignore 2024-09-18 15:48:22 -05:00
Kabooshki e0fe811463
removed debug function 2024-09-18 12:19:58 -05:00
Kabooshki 37082b6f3a
added script to review files flagged for uncertianty 2024-09-18 11:02:38 -05:00
Kabooshki a54b43e1f5
removed k var 2024-09-17 13:17:16 -05:00
Kabooshki b03b1aaf14
Merge branch 'fixer' 2024-09-17 13:16:39 -05:00
Kabooshki 81f331b77e
added comment 2024-09-17 13:14:31 -05:00
Kabooshki 801277ce2b
Merge branch 'fixer' 2024-09-17 13:06:34 -05:00
Kabooshki 9aaeb28ffc
added function to review duplicates 2024-09-17 13:05:33 -05:00
Kabooshki 1040e754e5
reverted 2024-09-17 13:04:49 -05:00
Kabooshki 3a558db188
reverted 2024-09-17 13:03:34 -05:00
Kabooshki cc23e1df33
Merge branch 'fixer' 2024-09-17 13:00:14 -05:00
Kabooshki cf8af5ab22
removed whitespace 2024-09-17 12:57:14 -05:00
Kabooshki d7d4bf51e0
todo edit 2024-09-17 12:54:22 -05:00
Kabooshki e986b8dd00 removed obsolete iterator 2024-09-17 12:04:41 -05:00
Kabooshki f22510d186 added a review function when duplicates are detected 2024-09-17 12:02:13 -05:00
Kabooshki 84c4122d5f new branch 2024-09-17 11:04:53 -05:00
Kabooshki d371bd0492 changed gitignore 2024-09-17 11:01:25 -05:00
Kabooshki b284314192 modified gitignore 2024-09-17 11:01:14 -05:00
Kabooshki 23bda32298 remove .DS_Store 2024-09-17 11:00:17 -05:00
5 changed files with 54 additions and 9 deletions

BIN
.DS_Store vendored

Binary file not shown.

4
.gitignore vendored
View File

@ -1,3 +1,7 @@
images*
test.py
1.jpg
test-files.tbkpf
".DS_Store"
.DS_Store
compile*

3
TODO
View File

@ -1,2 +1 @@
improve accuracy
improve accuracy

30
fixer.py Normal file
View File

@ -0,0 +1,30 @@
import cv2
import pytesseract
import os
import numpy
directory = 'images'
# iterate over files in
# that directory
pattern = 'review' # Replace with your target substring
matching_files = [f for f in os.listdir(directory) if pattern in f]
for files in os.listdir(directory):
if files.__contains__(pattern):
f = os.path.join(directory, files)
print(f)
image = cv2.imread(f, 0)
thresh = 255 - cv2.threshold(image, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
#bottom 2 locations
ROI1 = thresh[1383:1447,77:154]
ROI2 = thresh[1383:1447,950:1027]
ROI = cv2.hconcat([ROI1, ROI2])
cv2.imshow('Review', ROI)
cv2.waitKey(1)
manpage = input("please input the page number")
if os.path.isfile(f'images/{manpage}.jpg') == True:
rng = numpy.random.default_rng().random()
os.rename (f'images/{manpage}.jpg', f'images/{manpage}.review-{rng}.jpg')
os.rename(f'{f}', f'images/{manpage}.jpg')
elif os.path.isfile(f'images/{manpage}.jpg') == False:
os.rename(f'{f}', f'images/{manpage}.jpg')

26
main.py
View File

@ -1,9 +1,9 @@
import cv2
import pytesseract
import os
import numpy
directory = 'images'
k = 1
# iterate over files in
# that directory
for filename in os.listdir(directory):
@ -20,9 +20,21 @@ for filename in os.listdir(directory):
print(data)
if os.path.isfile(f'images/{data}.jpg') == False:
os.rename(f'{f}', f'images/{data}.jpg')
else:
cv2.imshow(ROI)
cv2.waitKey(50)
os.rename(f'{f}', f'images/{data}.{k}.jpg')
k = k + 1
elif os.path.isfile(f'images/{data}.jpg') == True:
cv2.imshow(data, ROI)
cv2.waitKey(1)
manpage = input("please input the number on the page (if the title is correct, enter nothing)")
#check if the new name already exists
if manpage == "\n":
rng = numpy.random.default_rng().random()
os.rename (f'images/{data}.jpg', f'images/{data}.review-{rng}.jpg')
os.rename(f'{f}', f'images/{data}.jpg')
elif manpage != "\n":
if os.path.isfile(f'images/{manpage}.jpg') == True:
rng = numpy.random.default_rng().random()
os.rename (f'images/{manpage}.jpg', f'images/{manpage}.review-{rng}.jpg')
os.rename(f'{f}', f'images/{manpage}.jpg')
elif os.path.isfile(f'images/{manpage}.jpg') == False:
os.rename(f'{f}', f'images/{manpage}.jpg')
cv2.destroyAllWindows()
cv2.destroyAllWindows()