From 404c31b0ed0332b985de493c2fee0b7eb3dd1fba Mon Sep 17 00:00:00 2001 From: Mohamed Nouffer Date: Mon, 3 Jan 2022 10:07:33 +0530 Subject: [PATCH] update for tesseract --- requirements.txt | 2 ++ sumasen_easyocr/views.py | 48 ++++++++++++++++++++-------- templates/sumasen_easyocr/index.html | 8 +++-- 3 files changed, 43 insertions(+), 15 deletions(-) diff --git a/requirements.txt b/requirements.txt index c765219..430c559 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,7 @@ asgiref==3.4.1 cycler==0.11.0 Django==3.2.9 +djangorestframework==3.12.4 easyocr==1.4.1 fonttools==4.28.2 imageio==2.9.0 @@ -12,6 +13,7 @@ opencv-python-headless==4.5.4.60 packaging==21.3 Pillow==8.2.0 pyparsing==3.0.6 +pytesseract==0.3.8 python-bidi==0.4.2 python-dateutil==2.8.2 pytz==2021.3 diff --git a/sumasen_easyocr/views.py b/sumasen_easyocr/views.py index c0c88c6..2a11604 100644 --- a/sumasen_easyocr/views.py +++ b/sumasen_easyocr/views.py @@ -5,6 +5,11 @@ from rest_framework.response import Response from rest_framework import status import easyocr +import cv2 +import pytesseract +from pytesseract import Output +import json + def UploaderView(request): return render(request, 'sumasen_easyocr/index.html') @@ -12,21 +17,38 @@ class FileUploadView(APIView): parser_classes = [MultiPartParser,] def post(self, request, format='jpg'): + ocr = request.POST.get("engine", None) + up_file = request.FILES['file'] - destination = open('/home/sumasen/django_easyocr_api/orc_api/sumasen_easyocr/uploaded' + up_file.name, 'wb+') + destination = open('/Users/mohamednouffer/workspace/akira_san/sumasen_ocr/sumasen_easyocr/uploaded' + up_file.name, 'wb+') + #destination = open('/home/sumasen/django_easyocr_api/orc_api/sumasen_easyocr/uploaded' + up_file.name, 'wb+') for chunk in up_file.chunks(): destination.write(chunk) destination.close() - try: - reader = easyocr.Reader(['ja','en']) - result = reader.readtext('/home/sumasen/django_easyocr_api/orc_api/sumasen_easyocr/uploaded' + up_file.name) - #print('@@@@@@@@@@') - #print(result) - #print('@@@@@@@@@@') - return Response(result, status.HTTP_201_CREATED) - #return Response({''}, status.HTTP_201_CREATED) - - except Exception as e: - print(e) - return Response({'Error': "Error occured"}, status.HTTP_201_CREATED) \ No newline at end of file + if ocr == 'EasyOCR': + try: + reader = easyocr.Reader(['ja','en']) + #result = reader.readtext('/home/sumasen/django_easyocr_api/orc_api/sumasen_easyocr/uploaded' + up_file.name) + result = reader.readtext('/Users/mohamednouffer/workspace/akira_san/sumasen_ocr/sumasen_easyocr/uploaded' + up_file.name) + #print('@@@@@@@@@@') + #print(result) + #print('@@@@@@@@@@') + return Response(result, status.HTTP_201_CREATED) + #return Response({''}, status.HTTP_201_CREATED) + + except Exception as e: + print(e) + return Response({'Error': "Error occured"}, status.HTTP_400_BAD_REQUEST) + else: + try: + + img = cv2.imread('/Users/mohamednouffer/workspace/akira_san/sumasen_ocr/sumasen_easyocr/uploaded' + up_file.name) + custom_config = r'--oem 3 --psm 6' + res = pytesseract.image_to_data(img, lang='jpn', config=custom_config, output_type=Output.DICT) + app_json = json.dumps(res) + return Response(app_json, status.HTTP_201_CREATED) + + except Exception as e: + print(e) + return Response({'Error': "Error occured"}, status.HTTP_400_BAD_REQUEST) \ No newline at end of file diff --git a/templates/sumasen_easyocr/index.html b/templates/sumasen_easyocr/index.html index 4710b1d..3b82430 100644 --- a/templates/sumasen_easyocr/index.html +++ b/templates/sumasen_easyocr/index.html @@ -9,10 +9,14 @@ - +{% comment %} http://aiworks.intranet.sumasen.net {% endcomment %}

Sumase OCR file upload test

-
+ +