mirror of https://github.com/CIRCL/AIL-framework
185 lines
6.1 KiB
Python
Executable File
185 lines
6.1 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
# -*-coding:UTF-8 -*
|
|
"""
|
|
Binary module
|
|
|
|
Dectect Binary and decode it
|
|
"""
|
|
import time
|
|
import os
|
|
import datetime
|
|
import redis
|
|
|
|
from pubsublogger import publisher
|
|
|
|
from Helper import Process
|
|
from packages import Paste
|
|
|
|
import re
|
|
from hashlib import sha1
|
|
import magic
|
|
import json
|
|
|
|
import signal
|
|
|
|
class TimeoutException(Exception):
|
|
pass
|
|
|
|
def timeout_handler(signum, frame):
|
|
raise TimeoutException
|
|
|
|
signal.signal(signal.SIGALRM, timeout_handler)
|
|
|
|
def decode_binary_string(binary_string):
|
|
return ''.join(chr(int(s[i*8:i*8+8],2)) for i in range(len(s)//8))
|
|
|
|
def decode_binary(binary_string):
|
|
return bytes(bytearray([int(binary_string[i:i+8], 2) for i in range(0, len(binary_string), 8)]))
|
|
|
|
|
|
def search_binary(content, message, date):
|
|
find = False
|
|
binary_list = re.findall(regex_binary, content)
|
|
if(len(binary_list) > 0):
|
|
|
|
for binary in binary_list:
|
|
if len(binary) >= 40 :
|
|
decode = decode_binary(binary)
|
|
print(message)
|
|
|
|
type = magic.from_buffer(decode, mime=True)
|
|
print(type)
|
|
|
|
find = True
|
|
hash = sha1(decode).hexdigest()
|
|
print(hash)
|
|
|
|
data = {}
|
|
data['name'] = hash
|
|
data['date'] = datetime.datetime.now().strftime("%d/%m/%y")
|
|
data['origin'] = message
|
|
data['estimated type'] = type
|
|
json_data = json.dumps(data)
|
|
|
|
date_paste = '{}/{}/{}'.format(date[0:4], date[4:6], date[6:8])
|
|
date_key = date[0:4] + date[4:6] + date[6:8]
|
|
|
|
serv_metadata.zincrby('binary_date:'+date_key, hash, 1)
|
|
|
|
# first time we see this hash
|
|
if not serv_metadata.hexists('metadata_hash:'+hash, 'estimated_type'):
|
|
serv_metadata.hset('metadata_hash:'+hash, 'first_seen', date_paste)
|
|
serv_metadata.hset('metadata_hash:'+hash, 'last_seen', date_paste)
|
|
else:
|
|
serv_metadata.hset('metadata_hash:'+hash, 'last_seen', date_paste)
|
|
|
|
# first time we see this file encoding on this paste
|
|
if serv_metadata.zscore('binary_hash:'+hash, message) is None:
|
|
print('first binary')
|
|
serv_metadata.hincrby('metadata_hash:'+hash, 'nb_seen_in_all_pastes', 1)
|
|
|
|
serv_metadata.sadd('binary_paste:'+message, hash) # paste - hash map
|
|
serv_metadata.zincrby('binary_hash:'+hash, message, 1)# hash - paste map
|
|
|
|
# create hash metadata
|
|
serv_metadata.hset('metadata_hash:'+hash, 'estimated_type', type)
|
|
serv_metadata.sadd('hash_all_type', type)
|
|
serv_metadata.sadd('hash_binary_all_type', type)
|
|
serv_metadata.zincrby('binary_type:'+type, date_key, 1)
|
|
|
|
save_binary_as_file(decode, type, hash, json_data, id)
|
|
print('found {} '.format(type))
|
|
# duplicate
|
|
else:
|
|
serv_metadata.zincrby('binary_hash:'+hash, message, 1) # number of b64 on this paste
|
|
|
|
if(find):
|
|
publisher.warning('binary decoded')
|
|
#Send to duplicate
|
|
p.populate_set_out(message, 'Duplicate')
|
|
#send to Browse_warning_paste
|
|
msg = ('binary;{}'.format(message))
|
|
p.populate_set_out( msg, 'alertHandler')
|
|
|
|
msg = 'infoleak:automatic-detection="binary";{}'.format(message)
|
|
p.populate_set_out(msg, 'Tags')
|
|
|
|
def save_binary_as_file(decode, type, hash, json_data, id):
|
|
|
|
local_filename_b64 = os.path.join(p.config.get("Directories", "base64"), type, hash[:2], hash)
|
|
filename_b64 = os.path.join(os.environ['AIL_HOME'], local_filename_b64)
|
|
|
|
filename_json = os.path.join(os.environ['AIL_HOME'],
|
|
p.config.get("Directories", "base64"), type, hash[:2], hash + '.json')
|
|
|
|
dirname = os.path.dirname(filename_b64)
|
|
if not os.path.exists(dirname):
|
|
os.makedirs(dirname)
|
|
|
|
with open(filename_b64, 'wb') as f:
|
|
f.write(decode)
|
|
|
|
# create hash metadata
|
|
serv_metadata.hset('metadata_hash:'+hash, 'saved_path', local_filename_b64)
|
|
serv_metadata.hset('metadata_hash:'+hash, 'size', os.path.getsize(filename_b64))
|
|
|
|
with open(filename_json, 'w') as f:
|
|
f.write(json_data)
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
# If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
|
|
# Port of the redis instance used by pubsublogger
|
|
publisher.port = 6380
|
|
# Script is the default channel used for the modules.
|
|
publisher.channel = 'Script'
|
|
|
|
# Section name in bin/packages/modules.cfg
|
|
config_section = 'Binary'
|
|
|
|
# Setup the I/O queues
|
|
p = Process(config_section)
|
|
max_execution_time = p.config.getint("Binary", "max_execution_time")
|
|
|
|
serv_metadata = redis.StrictRedis(
|
|
host=p.config.get("ARDB_Metadata", "host"),
|
|
port=p.config.getint("ARDB_Metadata", "port"),
|
|
db=p.config.getint("ARDB_Metadata", "db"),
|
|
decode_responses=True)
|
|
|
|
# Sent to the logging a description of the module
|
|
publisher.info("Binary started")
|
|
|
|
regex_binary = '[0-1]{40,}'
|
|
re.compile(regex_binary)
|
|
|
|
# Endless loop getting messages from the input queue
|
|
while True:
|
|
# Get one message from the input queue
|
|
message = p.get_from_set()
|
|
if message is None:
|
|
|
|
publisher.debug("{} queue is empty, waiting".format(config_section))
|
|
time.sleep(1)
|
|
continue
|
|
|
|
filename = message
|
|
paste = Paste.Paste(filename)
|
|
|
|
signal.alarm(max_execution_time)
|
|
try:
|
|
# Do something with the message from the queue
|
|
#print(filename)
|
|
content = paste.get_p_content()
|
|
date = str(paste._get_p_date())
|
|
search_binary(content,message, date)
|
|
|
|
except TimeoutException:
|
|
p.incr_module_timeout_statistic()
|
|
print ("{0} processing timeout".format(paste.p_path))
|
|
continue
|
|
else:
|
|
signal.alarm(0)
|