mirror of https://github.com/MISP/misp-modules
Modules for expansion services, import and export in MISP
http://misp.github.io/misp-modules
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
50 lines
1.4 KiB
50 lines
1.4 KiB
import json |
|
import binascii |
|
import cv2 |
|
import np |
|
import pytesseract |
|
|
|
misperrors = {'error': 'Error'} |
|
mispattributes = {'input': ['attachment'], |
|
'output': ['freetext', 'text']} |
|
moduleinfo = {'version': '0.1', 'author': 'Sascha Rommelfangen', |
|
'description': 'OCR decoder', |
|
'module-type': ['expansion']} |
|
|
|
moduleconfig = [] |
|
|
|
|
|
def handler(q=False): |
|
if q is False: |
|
return False |
|
q = json.loads(q) |
|
filename = q['attachment'] |
|
try: |
|
img_array = np.frombuffer(binascii.a2b_base64(q['data']), np.uint8) |
|
except Exception as e: |
|
print(e) |
|
err = "Couldn't fetch attachment (JSON 'data' is empty). Are you using the 'Query enrichment' action?" |
|
misperrors['error'] = err |
|
print(err) |
|
return misperrors |
|
|
|
image = img_array |
|
image = cv2.imdecode(img_array, cv2.IMREAD_COLOR) |
|
try: |
|
decoded = pytesseract.image_to_string(image) |
|
return {'results': [{'types': ['freetext'], 'values': decoded, 'comment': "OCR from file " + filename}, |
|
{'types': ['text'], 'values': decoded, 'comment': "ORC from file " + filename}]} |
|
except Exception as e: |
|
print(e) |
|
err = "Couldn't analyze file type. Only images are supported right now." |
|
misperrors['error'] = err |
|
return misperrors |
|
|
|
|
|
def introspection(): |
|
return mispattributes |
|
|
|
|
|
def version(): |
|
moduleinfo['config'] = moduleconfig |
|
return moduleinfo
|
|
|