Merge branch 'master' into advanced_crawler

pull/342/head
Terrtia 2019-03-26 16:03:42 +01:00
commit 59664efe45
No known key found for this signature in database
GPG Key ID: 1E1B1F50D84613D0
5 changed files with 109 additions and 14 deletions

View File

@ -12,7 +12,7 @@ AIL is a modular framework to analyse potential information leaks from unstructu
<table> <table>
<tr> <tr>
<td>Latest Release</td> <td>Latest Release</td>
<td><a href="https://badge.fury.io/gh/CIRCL%2FAIL-Framework"><img src="https://badge.fury.io/gh/CIRCL%2FAIL-Framework.svg" alt="GitHub version" height="18"></a></td> <td><a href="https://github.com/CIRCL/AIL-framework/releases/latest"><img src="https://img.shields.io/github/release/CIRCL/AIL-framework/all.svg"></a></td>
</tr> </tr>
<tr> <tr>
<td>Contributors</td> <td>Contributors</td>

83
bin/CVE_check.py Executable file
View File

@ -0,0 +1,83 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
from packages import Paste
from Helper import Process
import os
import re
import time
import redis
import configparser
from collections import defaultdict
def get_dict_cve(list_paste_cve, only_one_same_cve_by_paste=False):
dict_keyword = {}
for paste_cve in list_paste_cve:
paste_content = Paste.Paste(paste_cve).get_p_content()
cve_list = reg_cve.findall(paste_content)
if only_one_same_cve_by_paste:
cve_list = set(cve_list)
for cve in reg_cve.findall(paste_content):
try:
dict_keyword[cve] += 1
except KeyError:
dict_keyword[cve] = 1
print('------------------------------------------------')
if dict_keyword:
res = [(k, dict_keyword[k]) for k in sorted(dict_keyword, key=dict_keyword.get, reverse=True)]
for item in res:
pass
print(item)
if __name__ == '__main__':
# CONFIG #
configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg')
if not os.path.exists(configfile):
raise Exception('Unable to find the configuration file. \
Did you set environment variables? \
Or activate the virtualenv.')
cfg = configparser.ConfigParser()
cfg.read(configfile)
serv_metadata = redis.StrictRedis(
host=cfg.get("ARDB_Metadata", "host"),
port=cfg.getint("ARDB_Metadata", "port"),
db=cfg.getint("ARDB_Metadata", "db"),
decode_responses=True)
serv_tags = redis.StrictRedis(
host=cfg.get("ARDB_Tags", "host"),
port=cfg.get("ARDB_Tags", "port"),
db=cfg.get("ARDB_Tags", "db"),
decode_responses=True)
reg_cve = re.compile(r'CVE-[1-2]\d{1,4}-\d{1,7}')
#all_past_cve = serv_tags.smembers('infoleak:automatic-detection="cve"')
#all_past_cve_regular = serv_tags.sdiff('infoleak:automatic-detection="cve"', 'infoleak:submission="crawler"')
#all_past_cve_crawler = serv_tags.sinter('infoleak:automatic-detection="cve"', 'infoleak:submission="crawler"')
#print('{} + {} = {}'.format(len(all_past_cve_regular), len(all_past_cve_crawler), len(all_past_cve)))
print('ALL_CVE')
get_dict_cve(serv_tags.smembers('infoleak:automatic-detection="cve"'), True)
print()
print()
print()
print('REGULAR_CVE')
get_dict_cve(serv_tags.sdiff('infoleak:automatic-detection="cve"', 'infoleak:submission="crawler"'), True)
print()
print()
print()
print('CRAWLER_CVE')
get_dict_cve(serv_tags.sinter('infoleak:automatic-detection="cve"', 'infoleak:submission="crawler"'), True)

View File

@ -23,23 +23,17 @@ Requirements
import base64 import base64
import os import os
import time import time
import uuid
from pubsublogger import publisher from pubsublogger import publisher
from Helper import Process from Helper import Process
import magic import magic
import io
#import gzip
''' def rreplace(s, old, new, occurrence):
def gunzip_bytes_obj(bytes_obj): li = s.rsplit(old, occurrence)
in_ = io.BytesIO() return new.join(li)
in_.write(bytes_obj)
in_.seek(0)
with gzip.GzipFile(fileobj=in_, mode='rb') as fo:
gunzipped_bytes_obj = fo.read()
return gunzipped_bytes_obj.decode()'''
if __name__ == '__main__': if __name__ == '__main__':
publisher.port = 6380 publisher.port = 6380
@ -77,6 +71,12 @@ if __name__ == '__main__':
processed_paste = 0 processed_paste = 0
time.sleep(1) time.sleep(1)
continue continue
file_name_paste = paste.split('/')[-1]
if len(file_name_paste)>255:
new_file_name_paste = '{}{}.gz'.format(file_name_paste[:215], str(uuid.uuid4()))
paste = rreplace(paste, file_name_paste, new_file_name_paste, 1)
# Creating the full filepath # Creating the full filepath
filename = os.path.join(os.environ['AIL_HOME'], filename = os.path.join(os.environ['AIL_HOME'],
p.config.get("Directories", "pastes"), paste) p.config.get("Directories", "pastes"), paste)

View File

@ -198,8 +198,12 @@ if __name__ == "__main__":
print(len(domains_list)) print(len(domains_list))
if len(domains_list) > 0: if len(domains_list) > 0:
publisher.warning('{}Detected {} .onion(s);{}'.format( if not activate_crawler:
to_print, len(domains_list),PST.p_path)) publisher.warning('{}Detected {} .onion(s);{}'.format(
to_print, len(domains_list),PST.p_path))
else:
publisher.info('{}Detected {} .onion(s);{}'.format(
to_print, len(domains_list),PST.p_path))
now = datetime.datetime.now() now = datetime.datetime.now()
path = os.path.join('onions', str(now.year).zfill(4), path = os.path.join('onions', str(now.year).zfill(4),
str(now.month).zfill(2), str(now.month).zfill(2),
@ -220,6 +224,10 @@ if __name__ == "__main__":
else: else:
continue continue
# too many subdomain
if len(domain.split('.')) > 5:
continue
if not r_onion.sismember('month_onion_up:{}'.format(date_month), domain) and not r_onion.sismember('onion_down:'+date , domain): if not r_onion.sismember('month_onion_up:{}'.format(date_month), domain) and not r_onion.sismember('onion_down:'+date , domain):
if not r_onion.sismember('onion_domain_crawler_queue', domain): if not r_onion.sismember('onion_domain_crawler_queue', domain):
print('send to onion crawler') print('send to onion crawler')

View File

@ -130,7 +130,11 @@ class TorSplashCrawler():
print('Connection to proxy refused') print('Connection to proxy refused')
else: else:
UUID = self.domains[0]+str(uuid.uuid4()) #avoid filename too big
if len(self.domains[0]) > 215:
UUID = self.domains[0][-215:]+str(uuid.uuid4())
else:
UUID = self.domains[0]+str(uuid.uuid4())
filename_paste = os.path.join(self.crawled_paste_filemame, UUID) filename_paste = os.path.join(self.crawled_paste_filemame, UUID)
relative_filename_paste = os.path.join(self.crawler_path, UUID) relative_filename_paste = os.path.join(self.crawler_path, UUID)
filename_screenshot = os.path.join(self.crawled_screenshot, UUID +'.png') filename_screenshot = os.path.join(self.crawled_screenshot, UUID +'.png')