update for tesseract

This commit is contained in:
2022-01-03 10:07:33 +05:30
parent fae9e555dd
commit 404c31b0ed
3 changed files with 43 additions and 15 deletions

View File

@ -1,6 +1,7 @@
asgiref==3.4.1
cycler==0.11.0
Django==3.2.9
djangorestframework==3.12.4
easyocr==1.4.1
fonttools==4.28.2
imageio==2.9.0
@ -12,6 +13,7 @@ opencv-python-headless==4.5.4.60
packaging==21.3
Pillow==8.2.0
pyparsing==3.0.6
pytesseract==0.3.8
python-bidi==0.4.2
python-dateutil==2.8.2
pytz==2021.3

View File

@ -5,6 +5,11 @@ from rest_framework.response import Response
from rest_framework import status
import easyocr
import cv2
import pytesseract
from pytesseract import Output
import json
def UploaderView(request):
return render(request, 'sumasen_easyocr/index.html')
@ -12,21 +17,38 @@ class FileUploadView(APIView):
parser_classes = [MultiPartParser,]
def post(self, request, format='jpg'):
ocr = request.POST.get("engine", None)
up_file = request.FILES['file']
destination = open('/home/sumasen/django_easyocr_api/orc_api/sumasen_easyocr/uploaded' + up_file.name, 'wb+')
destination = open('/Users/mohamednouffer/workspace/akira_san/sumasen_ocr/sumasen_easyocr/uploaded' + up_file.name, 'wb+')
#destination = open('/home/sumasen/django_easyocr_api/orc_api/sumasen_easyocr/uploaded' + up_file.name, 'wb+')
for chunk in up_file.chunks():
destination.write(chunk)
destination.close()
try:
reader = easyocr.Reader(['ja','en'])
result = reader.readtext('/home/sumasen/django_easyocr_api/orc_api/sumasen_easyocr/uploaded' + up_file.name)
#print('@@@@@@@@@@')
#print(result)
#print('@@@@@@@@@@')
return Response(result, status.HTTP_201_CREATED)
#return Response({''}, status.HTTP_201_CREATED)
if ocr == 'EasyOCR':
try:
reader = easyocr.Reader(['ja','en'])
#result = reader.readtext('/home/sumasen/django_easyocr_api/orc_api/sumasen_easyocr/uploaded' + up_file.name)
result = reader.readtext('/Users/mohamednouffer/workspace/akira_san/sumasen_ocr/sumasen_easyocr/uploaded' + up_file.name)
#print('@@@@@@@@@@')
#print(result)
#print('@@@@@@@@@@')
return Response(result, status.HTTP_201_CREATED)
#return Response({''}, status.HTTP_201_CREATED)
except Exception as e:
print(e)
return Response({'Error': "Error occured"}, status.HTTP_201_CREATED)
except Exception as e:
print(e)
return Response({'Error': "Error occured"}, status.HTTP_400_BAD_REQUEST)
else:
try:
img = cv2.imread('/Users/mohamednouffer/workspace/akira_san/sumasen_ocr/sumasen_easyocr/uploaded' + up_file.name)
custom_config = r'--oem 3 --psm 6'
res = pytesseract.image_to_data(img, lang='jpn', config=custom_config, output_type=Output.DICT)
app_json = json.dumps(res)
return Response(app_json, status.HTTP_201_CREATED)
except Exception as e:
print(e)
return Response({'Error': "Error occured"}, status.HTTP_400_BAD_REQUEST)

View File

@ -9,10 +9,14 @@
</style>
</head>
{% comment %} http://aiworks.intranet.sumasen.net {% endcomment %}
<body>
<h2>Sumase OCR file upload test</h2>
<form action="http://aiworks.intranet.sumasen.net:8600/api/v1/upload/" method="post" enctype="multipart/form-data">
<form action="http://localhost:8000/api/v1/upload/" method="post" enctype="multipart/form-data">
<select name="engine">
<option>EasyOCR</option>
<option>Tesseract</option>
</select><br><br>
<input type="file" name="file" id="file_to_upload">
<hr>
<input type="submit" value="Upload To Server" id="upload_file_button">