2019-04-26 12:07:55 +02:00
import json
import binascii
import np
import ezodf
import pandas_ods_reader
import io
2021-12-24 16:10:29 +01:00
import logging
2019-04-26 12:07:55 +02:00
misperrors = { ' error ' : ' Error ' }
mispattributes = { ' input ' : [ ' attachment ' ] ,
' output ' : [ ' freetext ' , ' text ' ] }
2024-08-12 11:23:10 +02:00
moduleinfo = {
' version ' : ' 0.1 ' ,
' author ' : ' Sascha Rommelfangen ' ,
' description ' : ' Module to extract freetext from a .ods document. ' ,
' module-type ' : [ ' expansion ' ] ,
' name ' : ' ODS Enrich ' ,
' logo ' : ' ods.png ' ,
' requirements ' : [ ' ezodf: Python package to create/manipulate OpenDocumentFormat files. ' , ' pandas_ods_reader: Python library to read in ODS files. ' ] ,
' features ' : ' The module reads the text contained in a .ods document. The result is passed to the freetext import parser so IoCs can be extracted out of it. ' ,
' references ' : [ ] ,
' input ' : ' Attachment attribute containing a .ods document. ' ,
' output ' : ' Text and freetext parsed from the document. ' ,
}
2019-04-26 12:07:55 +02:00
moduleconfig = [ ]
def handler ( q = False ) :
if q is False :
return False
q = json . loads ( q )
filename = q [ ' attachment ' ]
try :
ods_array = np . frombuffer ( binascii . a2b_base64 ( q [ ' data ' ] ) , np . uint8 )
except Exception as e :
print ( e )
err = " Couldn ' t fetch attachment (JSON ' data ' is empty). Are you using the ' Query enrichment ' action? "
misperrors [ ' error ' ] = err
print ( err )
return misperrors
2019-04-26 12:14:56 +02:00
ods_content = " "
2019-04-26 12:07:55 +02:00
ods_file = io . BytesIO ( ods_array )
doc = ezodf . opendoc ( ods_file )
num_sheets = len ( doc . sheets )
try :
for i in range ( 0 , num_sheets ) :
2024-07-19 10:36:35 +02:00
rows = pandas_ods_reader . parsers . ods . get_rows ( doc , i )
2024-08-12 11:43:25 +02:00
try :
ods = pandas_ods_reader . algo . parse_data ( pandas_ods_reader . parsers . ods , rows , headers = False , columns = [ ] , skiprows = 0 )
ods = pandas_ods_reader . utils . sanitize_df ( ods )
except TypeError :
ods = pandas_ods_reader . algo . read_data ( pandas_ods_reader . parsers . ods , ods_file , i , headers = False )
2019-04-26 12:14:56 +02:00
ods_content = ods_content + " \n " + ods . to_string ( max_rows = None )
2019-04-26 12:07:55 +02:00
return { ' results ' : [ { ' types ' : [ ' freetext ' ] , ' values ' : ods_content , ' comment ' : " .ods-to-text from file " + filename } ,
{ ' types ' : [ ' text ' ] , ' values ' : ods_content , ' comment ' : " .ods-to-text from file " + filename } ] }
except Exception as e :
2021-12-24 16:10:29 +01:00
logging . exception ( e )
2019-04-26 12:07:55 +02:00
err = " Couldn ' t analyze file as .ods. Error was: " + str ( e )
misperrors [ ' error ' ] = err
return misperrors
def introspection ( ) :
return mispattributes
def version ( ) :
moduleinfo [ ' config ' ] = moduleconfig
return moduleinfo