From 60a3fbe28204c5178b2bceed2b3551a27e8c6ce4 Mon Sep 17 00:00:00 2001 From: Steve Clement Date: Thu, 28 Jun 2018 23:20:38 +0800 Subject: [PATCH] - added wand requirement - fixed missing return png byte-stream - move module import to handler to catch and report errorz --- REQUIREMENTS | 1 + misp_modules/modules/import_mod/ocr.py | 41 +++++++++++++++++++------- 2 files changed, 32 insertions(+), 10 deletions(-) diff --git a/REQUIREMENTS b/REQUIREMENTS index 9404855..c116763 100644 --- a/REQUIREMENTS +++ b/REQUIREMENTS @@ -14,6 +14,7 @@ git+https://github.com/MISP/PyMISP.git#egg=pymisp git+https://github.com/sebdraven/pyonyphe#egg=pyonyphe pillow pytesseract +wand SPARQLWrapper domaintools_api pygeoip diff --git a/misp_modules/modules/import_mod/ocr.py b/misp_modules/modules/import_mod/ocr.py index 0748d35..a30bba0 100755 --- a/misp_modules/modules/import_mod/ocr.py +++ b/misp_modules/modules/import_mod/ocr.py @@ -1,12 +1,5 @@ import json import base64 -import magic - -from PIL import Image - -from wand.image import Image as WImage - -from pytesseract import image_to_string from io import BytesIO misperrors = {'error': 'Error'} userConfig = { }; @@ -21,6 +14,32 @@ moduleconfig = [] def handler(q=False): + # try to import modules and return errors if module not found + try: + import magic + except ImportError: + misperrors['error'] = "Please pip(3) install magic" + return misperrors + + try: + from PIL import Image + except ImportError: + misperrors['error'] = "Please pip(3) install pillow" + return misperrors + + try: + # Official ImageMagick module + from wand.image import Image as WImage + except ImportError: + misperrors['error'] = "Please pip(3) install wand" + return misperrors + + try: + from pytesseract import image_to_string + except ImportError: + misperrors['error'] = "Please pip(3) install pytesseract" + return misperrors + if q is False: return False r = {'results': []} @@ -32,14 +51,16 @@ def handler(q=False): pages=len(pdf.sequence) img = WImage(width=pdf.width, height=pdf.height * pages) for p in range(pages): - img.composite(pdf.sequence[p], top=pdf.height * p, left=0) - image = document + image = img.composite(pdf.sequence[p], top=pdf.height * p, left=0) + image = img.make_blob('png') + else: + image = document image_file = BytesIO(image) image_file.seek(0) try: - im = WImage(blob=image_file) + im = Image.open(image_file) except IOError: misperrors['error'] = "Corrupt or not an image file." return misperrors