mirror of https://github.com/CIRCL/AIL-framework
fix: [module Webstats + BankAccount-Decoder] fix faup return type + remove old Paste library from BankAccount-Decoder #465
parent
d8fbd72863
commit
f9856a1589
|
@ -5,7 +5,7 @@
|
||||||
The BankAccount Module
|
The BankAccount Module
|
||||||
======================
|
======================
|
||||||
|
|
||||||
It apply IBAN regexes on paste content and warn if above a threshold.
|
It apply IBAN regexes on item content and warn if above a threshold.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
@ -17,7 +17,7 @@ import re
|
||||||
import string
|
import string
|
||||||
from itertools import chain
|
from itertools import chain
|
||||||
|
|
||||||
from packages import Paste
|
from packages import Item
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
|
|
||||||
from Helper import Process
|
from Helper import Process
|
||||||
|
@ -49,7 +49,7 @@ def is_valid_iban(iban):
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def check_all_iban(l_iban, paste, filename):
|
def check_all_iban(l_iban, obj_id):
|
||||||
nb_valid_iban = 0
|
nb_valid_iban = 0
|
||||||
for iban in l_iban:
|
for iban in l_iban:
|
||||||
iban = iban[0]+iban[1]+iban[2]
|
iban = iban[0]+iban[1]+iban[2]
|
||||||
|
@ -65,14 +65,14 @@ def check_all_iban(l_iban, paste, filename):
|
||||||
server_statistics.hincrby('iban_by_country:'+date, iban[0:2], 1)
|
server_statistics.hincrby('iban_by_country:'+date, iban[0:2], 1)
|
||||||
|
|
||||||
if(nb_valid_iban > 0):
|
if(nb_valid_iban > 0):
|
||||||
to_print = 'Iban;{};{};{};'.format(paste.p_source, paste.p_date, paste.p_name)
|
to_print = 'Iban;{};{};{};'.format(Item.get_source(obj_id), Item.get_item_date(obj_id), Item.get_basename(obj_id))
|
||||||
publisher.warning('{}Checked found {} IBAN;{}'.format(
|
publisher.warning('{}Checked found {} IBAN;{}'.format(
|
||||||
to_print, nb_valid_iban, paste.p_rel_path))
|
to_print, nb_valid_iban, obj_id))
|
||||||
msg = 'infoleak:automatic-detection="iban";{}'.format(filename)
|
msg = 'infoleak:automatic-detection="iban";{}'.format(obj_id)
|
||||||
p.populate_set_out(msg, 'Tags')
|
p.populate_set_out(msg, 'Tags')
|
||||||
|
|
||||||
#Send to duplicate
|
#Send to duplicate
|
||||||
p.populate_set_out(filename, 'Duplicate')
|
p.populate_set_out(obj_id, 'Duplicate')
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
publisher.port = 6380
|
publisher.port = 6380
|
||||||
|
@ -103,21 +103,21 @@ if __name__ == "__main__":
|
||||||
|
|
||||||
if message is not None:
|
if message is not None:
|
||||||
|
|
||||||
filename = message
|
obj_id = Item.get_item_id(message)
|
||||||
paste = Paste.Paste(filename)
|
|
||||||
content = paste.get_p_content()
|
content = Item.get_item_content(obj_id)
|
||||||
|
|
||||||
signal.alarm(max_execution_time)
|
signal.alarm(max_execution_time)
|
||||||
try:
|
try:
|
||||||
l_iban = iban_regex.findall(content)
|
l_iban = iban_regex.findall(content)
|
||||||
except TimeoutException:
|
except TimeoutException:
|
||||||
print ("{0} processing timeout".format(paste.p_rel_path))
|
print ("{0} processing timeout".format(obj_id))
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
signal.alarm(0)
|
signal.alarm(0)
|
||||||
|
|
||||||
if(len(l_iban) > 0):
|
if(len(l_iban) > 0):
|
||||||
check_all_iban(l_iban, paste, filename)
|
check_all_iban(l_iban, obj_id)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
publisher.debug("Script BankAccount is Idling 10s")
|
publisher.debug("Script BankAccount is Idling 10s")
|
||||||
|
|
|
@ -17,7 +17,6 @@ import datetime
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
|
|
||||||
from Helper import Process
|
from Helper import Process
|
||||||
from packages import Paste
|
|
||||||
from packages import Item
|
from packages import Item
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
@ -50,11 +49,11 @@ def decode_string(content, message, date, encoded_list, decoder_name, encoded_mi
|
||||||
|
|
||||||
save_hash(decoder_name, message, date, decode)
|
save_hash(decoder_name, message, date, decode)
|
||||||
|
|
||||||
#remove encoded from paste content
|
#remove encoded from item content
|
||||||
content = content.replace(encoded, '', 1)
|
content = content.replace(encoded, '', 1)
|
||||||
|
|
||||||
if(find):
|
if(find):
|
||||||
set_out_paste(decoder_name, message)
|
set_out_item(decoder_name, message)
|
||||||
|
|
||||||
return content
|
return content
|
||||||
|
|
||||||
|
@ -72,8 +71,8 @@ def save_hash(decoder_name, message, date, decoded):
|
||||||
data['estimated type'] = type
|
data['estimated type'] = type
|
||||||
json_data = json.dumps(data)
|
json_data = json.dumps(data)
|
||||||
|
|
||||||
date_paste = '{}/{}/{}'.format(date[0:4], date[4:6], date[6:8])
|
date_item = '{}/{}/{}'.format(date[0:4], date[4:6], date[6:8])
|
||||||
date_key = date[0:4] + date[4:6] + date[6:8]
|
date_key = date
|
||||||
|
|
||||||
serv_metadata.incrby(decoder_name+'_decoded:'+date_key, 1)
|
serv_metadata.incrby(decoder_name+'_decoded:'+date_key, 1)
|
||||||
serv_metadata.zincrby('hash_date:'+date_key, hash, 1)
|
serv_metadata.zincrby('hash_date:'+date_key, hash, 1)
|
||||||
|
@ -81,24 +80,24 @@ def save_hash(decoder_name, message, date, decoded):
|
||||||
|
|
||||||
# first time we see this hash
|
# first time we see this hash
|
||||||
if not serv_metadata.hexists('metadata_hash:'+hash, 'estimated_type'):
|
if not serv_metadata.hexists('metadata_hash:'+hash, 'estimated_type'):
|
||||||
serv_metadata.hset('metadata_hash:'+hash, 'first_seen', date_paste)
|
serv_metadata.hset('metadata_hash:'+hash, 'first_seen', date_item)
|
||||||
serv_metadata.hset('metadata_hash:'+hash, 'last_seen', date_paste)
|
serv_metadata.hset('metadata_hash:'+hash, 'last_seen', date_item)
|
||||||
else:
|
else:
|
||||||
serv_metadata.hset('metadata_hash:'+hash, 'last_seen', date_paste)
|
serv_metadata.hset('metadata_hash:'+hash, 'last_seen', date_item)
|
||||||
|
|
||||||
# first time we see this hash (all encoding) on this paste
|
# first time we see this hash (all encoding) on this item
|
||||||
if serv_metadata.zscore('nb_seen_hash:'+hash, message) is None:
|
if serv_metadata.zscore('nb_seen_hash:'+hash, message) is None:
|
||||||
serv_metadata.hincrby('metadata_hash:'+hash, 'nb_seen_in_all_pastes', 1)
|
serv_metadata.hincrby('metadata_hash:'+hash, 'nb_seen_in_all_pastes', 1)
|
||||||
serv_metadata.sadd('hash_paste:'+message, hash) # paste - hash map
|
serv_metadata.sadd('hash_paste:'+message, hash) # item - hash map
|
||||||
# create hash metadata
|
# create hash metadata
|
||||||
serv_metadata.hset('metadata_hash:'+hash, 'estimated_type', type)
|
serv_metadata.hset('metadata_hash:'+hash, 'estimated_type', type)
|
||||||
serv_metadata.sadd('hash_all_type', type)
|
serv_metadata.sadd('hash_all_type', type)
|
||||||
|
|
||||||
# first time we see this hash encoding on this paste
|
# first time we see this hash encoding on this item
|
||||||
if serv_metadata.zscore(decoder_name+'_hash:'+hash, message) is None:
|
if serv_metadata.zscore(decoder_name+'_hash:'+hash, message) is None:
|
||||||
print('first '+decoder_name)
|
print('first '+decoder_name)
|
||||||
|
|
||||||
serv_metadata.sadd(decoder_name+'_paste:'+message, hash) # paste - hash map
|
serv_metadata.sadd(decoder_name+'_paste:'+message, hash) # item - hash map
|
||||||
|
|
||||||
# create hash metadata
|
# create hash metadata
|
||||||
serv_metadata.sadd('hash_'+ decoder_name +'_all_type', type)
|
serv_metadata.sadd('hash_'+ decoder_name +'_all_type', type)
|
||||||
|
@ -118,8 +117,8 @@ def save_hash(decoder_name, message, date, decoded):
|
||||||
|
|
||||||
serv_metadata.zincrby(decoder_name+'_type:'+type, date_key, 1)
|
serv_metadata.zincrby(decoder_name+'_type:'+type, date_key, 1)
|
||||||
|
|
||||||
serv_metadata.zincrby('nb_seen_hash:'+hash, message, 1)# hash - paste map
|
serv_metadata.zincrby('nb_seen_hash:'+hash, message, 1)# hash - item map
|
||||||
serv_metadata.zincrby(decoder_name+'_hash:'+hash, message, 1) # number of b64 on this paste
|
serv_metadata.zincrby(decoder_name+'_hash:'+hash, message, 1) # number of b64 on this item
|
||||||
|
|
||||||
# Domain Object
|
# Domain Object
|
||||||
if Item.is_crawled(message):
|
if Item.is_crawled(message):
|
||||||
|
@ -150,7 +149,7 @@ def save_hash_on_disk(decode, type, hash, json_data):
|
||||||
with open(filename_json, 'w') as f:
|
with open(filename_json, 'w') as f:
|
||||||
f.write(json_data)
|
f.write(json_data)
|
||||||
|
|
||||||
def set_out_paste(decoder_name, message):
|
def set_out_item(decoder_name, message):
|
||||||
publisher.warning(decoder_name+' decoded')
|
publisher.warning(decoder_name+' decoded')
|
||||||
#Send to duplicate
|
#Send to duplicate
|
||||||
p.populate_set_out(message, 'Duplicate')
|
p.populate_set_out(message, 'Duplicate')
|
||||||
|
@ -217,12 +216,11 @@ if __name__ == '__main__':
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
filename = message
|
obj_id = Item.get_item_id(message)
|
||||||
paste = Paste.Paste(filename)
|
|
||||||
|
|
||||||
# Do something with the message from the queue
|
# Do something with the message from the queue
|
||||||
content = paste.get_p_content()
|
content = Item.get_item_content(obj_id)
|
||||||
date = str(paste._get_p_date())
|
date = Item.get_item_date(obj_id)
|
||||||
|
|
||||||
for decoder in decoder_order: # add threshold and size limit
|
for decoder in decoder_order: # add threshold and size limit
|
||||||
|
|
||||||
|
@ -233,7 +231,7 @@ if __name__ == '__main__':
|
||||||
except TimeoutException:
|
except TimeoutException:
|
||||||
encoded_list = []
|
encoded_list = []
|
||||||
p.incr_module_timeout_statistic() # add encoder type
|
p.incr_module_timeout_statistic() # add encoder type
|
||||||
print ("{0} processing timeout".format(paste.p_rel_path))
|
print ("{0} processing timeout".format(obj_id))
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
signal.alarm(0)
|
signal.alarm(0)
|
||||||
|
|
|
@ -29,7 +29,10 @@ num_day_to_look = 5 # the detection of the progression start num_day_to_lo
|
||||||
def analyse(server, field_name, date, url_parsed):
|
def analyse(server, field_name, date, url_parsed):
|
||||||
field = url_parsed[field_name]
|
field = url_parsed[field_name]
|
||||||
if field is not None:
|
if field is not None:
|
||||||
field = field.decode('utf8')
|
try: # faup version
|
||||||
|
field = field.decode()
|
||||||
|
except:
|
||||||
|
pass
|
||||||
server.hincrby(field, date, 1)
|
server.hincrby(field, date, 1)
|
||||||
if field_name == "domain": #save domain in a set for the monthly plot
|
if field_name == "domain": #save domain in a set for the monthly plot
|
||||||
domain_set_name = "domain_set_" + date[0:6]
|
domain_set_name = "domain_set_" + date[0:6]
|
||||||
|
|
|
@ -32,6 +32,9 @@ def exist_item(item_id):
|
||||||
else:
|
else:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
def get_basename(item_id):
|
||||||
|
return os.path.basename(item_id)
|
||||||
|
|
||||||
def get_item_id(full_path):
|
def get_item_id(full_path):
|
||||||
return full_path.replace(PASTES_FOLDER, '', 1)
|
return full_path.replace(PASTES_FOLDER, '', 1)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue