diff --git a/misp_modules/modules/expansion/__init__.py b/misp_modules/modules/expansion/__init__.py index 994b289..ec78e9b 100644 --- a/misp_modules/modules/expansion/__init__.py +++ b/misp_modules/modules/expansion/__init__.py @@ -8,4 +8,5 @@ __all__ = ['cuckoo_submit', 'vmray_submit', 'bgpranking', 'circl_passivedns', 'c 'yara_syntax_validator', 'hashdd', 'onyphe', 'onyphe_full', 'rbl', 'xforceexchange', 'sigma_syntax_validator', 'stix2_pattern_syntax_validator', 'sigma_queries', 'dbl_spamhaus', 'vulners', 'yara_query', 'macaddress_io', - 'intel471', 'backscatter_io', 'btc_scam_check', 'hibp', 'greynoise', 'macvendors', 'qrcode'] + 'intel471', 'backscatter_io', 'btc_scam_check', 'hibp', 'greynoise', 'macvendors', + 'qrcode', 'ocr'] diff --git a/misp_modules/modules/expansion/ocr.py b/misp_modules/modules/expansion/ocr.py new file mode 100644 index 0000000..afdf343 --- /dev/null +++ b/misp_modules/modules/expansion/ocr.py @@ -0,0 +1,51 @@ +import json +import re +import binascii +import cv2 +import np +import pytesseract + +misperrors = {'error': 'Error'} +mispattributes = {'input': ['attachment'], + 'output': ['freetext', 'text']} +moduleinfo = {'version': '0.1', 'author': 'Sascha Rommelfangen', + 'description': 'OCR decoder', + 'module-type': ['expansion']} + +moduleconfig = [] + + +def handler(q=False): + if q is False: + return False + q = json.loads(q) + filename = q['attachment'] + try: + img_array = np.frombuffer(binascii.a2b_base64(q['data']), np.uint8) + except Exception as e: + print(e) + err = "Couldn't fetch attachment (JSON 'data' is empty). Are you using the 'Query enrichment' action?" + misperrors['error'] = err + print(err) + return misperrors + + image = img_array + image = cv2.imdecode(img_array, cv2.IMREAD_COLOR) + try: + decoded = pytesseract.image_to_string(image) + return {'results': [{'types': ['freetext'], 'values': decoded, 'comment': "OCR from file " + filename}, + {'types': ['text'], 'values': decoded, 'comment': "ORC from file " + filename}]} + except Exception as e: + print(e) + err = "Couldn't analyze file type. Only images are supported right now." + misperrors['error'] = err + return misperrors + + +def introspection(): + return mispattributes + + +def version(): + moduleinfo['config'] = moduleconfig + return moduleinfo