introduction of new modules

pull/302/head
Sascha Rommelfangen 2019-04-26 12:07:55 +02:00
parent 61961c954b
commit f55d7946df
6 changed files with 326 additions and 0 deletions

View File

@ -0,0 +1,61 @@
import json
import binascii
import np
import docx
import io
misperrors = {'error': 'Error'}
mispattributes = {'input': ['attachment'],
'output': ['freetext', 'text']}
moduleinfo = {'version': '0.1', 'author': 'Sascha Rommelfangen',
'description': '.docx to freetext-import IOC extractor',
'module-type': ['expansion']}
moduleconfig = []
def handler(q=False):
if q is False:
return False
q = json.loads(q)
filename = q['attachment']
try:
docx_array = np.frombuffer(binascii.a2b_base64(q['data']), np.uint8)
except Exception as e:
print(e)
err = "Couldn't fetch attachment (JSON 'data' is empty). Are you using the 'Query enrichment' action?"
misperrors['error'] = err
print(err)
return misperrors
doc_content = ""
doc_file = io.BytesIO(docx_array)
try:
doc = docx.Document(doc_file)
for para in doc.paragraphs:
print(para.text)
doc_content = doc_content + "\n" + para.text
tables = doc.tables
for table in tables:
for row in table.rows:
for cell in row.cells:
for para in cell.paragraphs:
print(para.text)
doc_content = doc_content + "\n" + para.text
print(doc_content)
return {'results': [{'types': ['freetext'], 'values': doc_content, 'comment': ".docx-to-text from file " + filename},
{'types': ['text'], 'values': doc_content, 'comment': ".docx-to-text from file " + filename}]}
except Exception as e:
print(e)
err = "Couldn't analyze file as .docx. Error was: " + str(e)
misperrors['error'] = err
return misperrors
def introspection():
return mispattributes
def version():
moduleinfo['config'] = moduleconfig
return moduleinfo

View File

@ -0,0 +1,56 @@
import json
import binascii
import np
import ezodf
import pandas_ods_reader
import io
misperrors = {'error': 'Error'}
mispattributes = {'input': ['attachment'],
'output': ['freetext', 'text']}
moduleinfo = {'version': '0.1', 'author': 'Sascha Rommelfangen',
'description': '.ods to freetext-import IOC extractor',
'module-type': ['expansion']}
moduleconfig = []
def handler(q=False):
if q is False:
return False
q = json.loads(q)
filename = q['attachment']
try:
ods_array = np.frombuffer(binascii.a2b_base64(q['data']), np.uint8)
except Exception as e:
print(e)
err = "Couldn't fetch attachment (JSON 'data' is empty). Are you using the 'Query enrichment' action?"
misperrors['error'] = err
print(err)
return misperrors
ods_content = ""
ods_file = io.BytesIO(ods_array)
doc = ezodf.opendoc(ods_file)
num_sheets = len(doc.sheets)
try:
for i in range(0, num_sheets):
ods = pandas_ods_reader.read_ods(ods_file, i, headers=False)
ods_content = ods_content + "\n" + ods.to_string(max_rows=None)
print(ods_content)
return {'results': [{'types': ['freetext'], 'values': ods_content, 'comment': ".ods-to-text from file " + filename},
{'types': ['text'], 'values': ods_content, 'comment': ".ods-to-text from file " + filename}]}
except Exception as e:
print(e)
err = "Couldn't analyze file as .ods. Error was: " + str(e)
misperrors['error'] = err
return misperrors
def introspection():
return mispattributes
def version():
moduleinfo['config'] = moduleconfig
return moduleinfo

View File

@ -0,0 +1,51 @@
import json
import binascii
import np
from ODTReader.odtreader import odtToText
import io
misperrors = {'error': 'Error'}
mispattributes = {'input': ['attachment'],
'output': ['freetext', 'text']}
moduleinfo = {'version': '0.1', 'author': 'Sascha Rommelfangen',
'description': '.odt to freetext-import IOC extractor',
'module-type': ['expansion']}
moduleconfig = []
def handler(q=False):
if q is False:
return False
q = json.loads(q)
filename = q['attachment']
try:
odt_array = np.frombuffer(binascii.a2b_base64(q['data']), np.uint8)
except Exception as e:
print(e)
err = "Couldn't fetch attachment (JSON 'data' is empty). Are you using the 'Query enrichment' action?"
misperrors['error'] = err
print(err)
return misperrors
odt_content = ""
odt_file = io.BytesIO(odt_array)
try:
odt_content = odtToText(odt_file)
print(odt_content)
return {'results': [{'types': ['freetext'], 'values': odt_content, 'comment': ".odt-to-text from file " + filename},
{'types': ['text'], 'values': odt_content, 'comment': ".odt-to-text from file " + filename}]}
except Exception as e:
print(e)
err = "Couldn't analyze file as .odt. Error was: " + str(e)
misperrors['error'] = err
return misperrors
def introspection():
return mispattributes
def version():
moduleinfo['config'] = moduleconfig
return moduleinfo

View File

@ -0,0 +1,50 @@
import json
import binascii
import np
import pytesseract
import pdftotext
import io
import collections
misperrors = {'error': 'Error'}
mispattributes = {'input': ['attachment'],
'output': ['freetext', 'text']}
moduleinfo = {'version': '0.1', 'author': 'Sascha Rommelfangen',
'description': 'PDF to freetext-import IOC extractor',
'module-type': ['expansion']}
moduleconfig = []
def handler(q=False):
if q is False:
return False
q = json.loads(q)
filename = q['attachment']
try:
pdf_array = np.frombuffer(binascii.a2b_base64(q['data']), np.uint8)
except Exception as e:
print(e)
err = "Couldn't fetch attachment (JSON 'data' is empty). Are you using the 'Query enrichment' action?"
misperrors['error'] = err
print(err)
return misperrors
pdf_file = io.BytesIO(pdf_array)
try:
pdf_content = "\n\n".join(pdftotext.PDF(pdf_file))
return {'results': [{'types': ['freetext'], 'values': pdf_content, 'comment': "PDF-to-text from file " + filename}]}
except Exception as e:
print(e)
err = "Couldn't analyze file as PDF. Error was: " + str(e)
misperrors['error'] = err
return misperrors
def introspection():
return mispattributes
def version():
moduleinfo['config'] = moduleconfig
return moduleinfo

View File

@ -0,0 +1,55 @@
import json
import binascii
import np
from pptx import Presentation
import io
misperrors = {'error': 'Error'}
mispattributes = {'input': ['attachment'],
'output': ['freetext', 'text']}
moduleinfo = {'version': '0.1', 'author': 'Sascha Rommelfangen',
'description': '.pptx to freetext-import IOC extractor',
'module-type': ['expansion']}
moduleconfig = []
def handler(q=False):
if q is False:
return False
q = json.loads(q)
filename = q['attachment']
try:
pptx_array = np.frombuffer(binascii.a2b_base64(q['data']), np.uint8)
except Exception as e:
print(e)
err = "Couldn't fetch attachment (JSON 'data' is empty). Are you using the 'Query enrichment' action?"
misperrors['error'] = err
print(err)
return misperrors
ppt_content = ""
ppt_file = io.BytesIO(pptx_array)
try:
ppt = Presentation(ppt_file)
for slide in ppt.slides:
for shape in slide.shapes:
if hasattr(shape, "text"):
print(shape.text)
ppt_content = ppt_content + "\n" + shape.text
return {'results': [{'types': ['freetext'], 'values': ppt_content, 'comment': ".pptx-to-text from file " + filename},
{'types': ['text'], 'values': ppt_content, 'comment': ".pptx-to-text from file " + filename}]}
except Exception as e:
print(e)
err = "Couldn't analyze file as .pptx. Error was: " + str(e)
misperrors['error'] = err
return misperrors
def introspection():
return mispattributes
def version():
moduleinfo['config'] = moduleconfig
return moduleinfo

View File

@ -0,0 +1,53 @@
import json
import binascii
import np
import pandas
import io
misperrors = {'error': 'Error'}
mispattributes = {'input': ['attachment'],
'output': ['freetext', 'text']}
moduleinfo = {'version': '0.1', 'author': 'Sascha Rommelfangen',
'description': '.xlsx to freetext-import IOC extractor',
'module-type': ['expansion']}
moduleconfig = []
def handler(q=False):
if q is False:
return False
q = json.loads(q)
filename = q['attachment']
try:
xlsx_array = np.frombuffer(binascii.a2b_base64(q['data']), np.uint8)
except Exception as e:
print(e)
err = "Couldn't fetch attachment (JSON 'data' is empty). Are you using the 'Query enrichment' action?"
misperrors['error'] = err
print(err)
return misperrors
xls_content = ""
xls_file = io.BytesIO(xlsx_array)
pandas.set_option('display.max_colwidth', -1)
try:
xls = pandas.read_excel(xls_file)
xls_content = xls.to_string(max_rows=None)
print(xls_content)
return {'results': [{'types': ['freetext'], 'values': xls_content, 'comment': ".xlsx-to-text from file " + filename},
{'types': ['text'], 'values': xls_content, 'comment': ".xlsx-to-text from file " + filename}]}
except Exception as e:
print(e)
err = "Couldn't analyze file as .xlsx. Error was: " + str(e)
misperrors['error'] = err
return misperrors
def introspection():
return mispattributes
def version():
moduleinfo['config'] = moduleconfig
return moduleinfo