From f97c5d62d6b970eba67e18144a44e387f7bbf833 Mon Sep 17 00:00:00 2001 From: Alexandre Dulaunoy Date: Thu, 4 Aug 2016 14:32:50 +0200 Subject: [PATCH] First version of an Optical Character Recognition (OCR) module for MISP --- misp_modules/modules/import/ocr.py | 59 ++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100755 misp_modules/modules/import/ocr.py diff --git a/misp_modules/modules/import/ocr.py b/misp_modules/modules/import/ocr.py new file mode 100755 index 0000000..53c5379 --- /dev/null +++ b/misp_modules/modules/import/ocr.py @@ -0,0 +1,59 @@ +import json +import base64 + +try: + import Image +except ImportError: + from PIL import Image + +from pytesseract import image_to_string +from io import BytesIO +misperrors = {'error': 'Error'} +userConfig = { }; + +inputSource = ['file'] + +moduleinfo = {'version': '0.1', 'author': 'Alexandre Dulaunoy', + 'description': 'Optical Character Recognition (OCR) module for MISP', + 'module-type': ['import']} + +moduleconfig = [] + + +def handler(q=False): + if q is False: + return False + r = {'results': []} + request = json.loads(q) + image = base64.b64decode(request["data"]) + image_file = BytesIO(image) + ocrized = image_to_string(Image.open(image_file)) + freetext = {} + freetext['values'] = ocrized + freetext['type'] = 'freetext' + r['results'].append(freetext) + return r + + +def introspection(): + modulesetup = {} + try: + userConfig + modulesetup['userConfig'] = userConfig + except NameError: + pass + try: + inputSource + modulesetup['inputSource'] = inputSource + except NameError: + pass + return modulesetup + + +def version(): + moduleinfo['config'] = moduleconfig + return moduleinfo + +if __name__ == '__main__': + x = open('test.json', 'r') + handler(q=x.read())