mirror of https://github.com/CIRCL/AIL-framework
Merge branch 'master' into advanced_crawler
commit
59664efe45
|
@ -12,7 +12,7 @@ AIL is a modular framework to analyse potential information leaks from unstructu
|
||||||
<table>
|
<table>
|
||||||
<tr>
|
<tr>
|
||||||
<td>Latest Release</td>
|
<td>Latest Release</td>
|
||||||
<td><a href="https://badge.fury.io/gh/CIRCL%2FAIL-Framework"><img src="https://badge.fury.io/gh/CIRCL%2FAIL-Framework.svg" alt="GitHub version" height="18"></a></td>
|
<td><a href="https://github.com/CIRCL/AIL-framework/releases/latest"><img src="https://img.shields.io/github/release/CIRCL/AIL-framework/all.svg"></a></td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<td>Contributors</td>
|
<td>Contributors</td>
|
||||||
|
|
|
@ -0,0 +1,83 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*-coding:UTF-8 -*
|
||||||
|
|
||||||
|
from packages import Paste
|
||||||
|
from Helper import Process
|
||||||
|
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
import redis
|
||||||
|
import configparser
|
||||||
|
|
||||||
|
from collections import defaultdict
|
||||||
|
|
||||||
|
def get_dict_cve(list_paste_cve, only_one_same_cve_by_paste=False):
|
||||||
|
dict_keyword = {}
|
||||||
|
|
||||||
|
for paste_cve in list_paste_cve:
|
||||||
|
paste_content = Paste.Paste(paste_cve).get_p_content()
|
||||||
|
|
||||||
|
cve_list = reg_cve.findall(paste_content)
|
||||||
|
if only_one_same_cve_by_paste:
|
||||||
|
cve_list = set(cve_list)
|
||||||
|
|
||||||
|
for cve in reg_cve.findall(paste_content):
|
||||||
|
try:
|
||||||
|
dict_keyword[cve] += 1
|
||||||
|
except KeyError:
|
||||||
|
dict_keyword[cve] = 1
|
||||||
|
|
||||||
|
print('------------------------------------------------')
|
||||||
|
if dict_keyword:
|
||||||
|
res = [(k, dict_keyword[k]) for k in sorted(dict_keyword, key=dict_keyword.get, reverse=True)]
|
||||||
|
for item in res:
|
||||||
|
pass
|
||||||
|
print(item)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
# CONFIG #
|
||||||
|
configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg')
|
||||||
|
if not os.path.exists(configfile):
|
||||||
|
raise Exception('Unable to find the configuration file. \
|
||||||
|
Did you set environment variables? \
|
||||||
|
Or activate the virtualenv.')
|
||||||
|
|
||||||
|
cfg = configparser.ConfigParser()
|
||||||
|
cfg.read(configfile)
|
||||||
|
|
||||||
|
serv_metadata = redis.StrictRedis(
|
||||||
|
host=cfg.get("ARDB_Metadata", "host"),
|
||||||
|
port=cfg.getint("ARDB_Metadata", "port"),
|
||||||
|
db=cfg.getint("ARDB_Metadata", "db"),
|
||||||
|
decode_responses=True)
|
||||||
|
|
||||||
|
serv_tags = redis.StrictRedis(
|
||||||
|
host=cfg.get("ARDB_Tags", "host"),
|
||||||
|
port=cfg.get("ARDB_Tags", "port"),
|
||||||
|
db=cfg.get("ARDB_Tags", "db"),
|
||||||
|
decode_responses=True)
|
||||||
|
|
||||||
|
reg_cve = re.compile(r'CVE-[1-2]\d{1,4}-\d{1,7}')
|
||||||
|
|
||||||
|
#all_past_cve = serv_tags.smembers('infoleak:automatic-detection="cve"')
|
||||||
|
#all_past_cve_regular = serv_tags.sdiff('infoleak:automatic-detection="cve"', 'infoleak:submission="crawler"')
|
||||||
|
#all_past_cve_crawler = serv_tags.sinter('infoleak:automatic-detection="cve"', 'infoleak:submission="crawler"')
|
||||||
|
|
||||||
|
#print('{} + {} = {}'.format(len(all_past_cve_regular), len(all_past_cve_crawler), len(all_past_cve)))
|
||||||
|
|
||||||
|
print('ALL_CVE')
|
||||||
|
get_dict_cve(serv_tags.smembers('infoleak:automatic-detection="cve"'), True)
|
||||||
|
print()
|
||||||
|
print()
|
||||||
|
print()
|
||||||
|
print('REGULAR_CVE')
|
||||||
|
get_dict_cve(serv_tags.sdiff('infoleak:automatic-detection="cve"', 'infoleak:submission="crawler"'), True)
|
||||||
|
print()
|
||||||
|
print()
|
||||||
|
print()
|
||||||
|
print('CRAWLER_CVE')
|
||||||
|
get_dict_cve(serv_tags.sinter('infoleak:automatic-detection="cve"', 'infoleak:submission="crawler"'), True)
|
|
@ -23,23 +23,17 @@ Requirements
|
||||||
import base64
|
import base64
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
|
import uuid
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
|
|
||||||
from Helper import Process
|
from Helper import Process
|
||||||
|
|
||||||
import magic
|
import magic
|
||||||
import io
|
|
||||||
#import gzip
|
|
||||||
|
|
||||||
'''
|
def rreplace(s, old, new, occurrence):
|
||||||
def gunzip_bytes_obj(bytes_obj):
|
li = s.rsplit(old, occurrence)
|
||||||
in_ = io.BytesIO()
|
return new.join(li)
|
||||||
in_.write(bytes_obj)
|
|
||||||
in_.seek(0)
|
|
||||||
with gzip.GzipFile(fileobj=in_, mode='rb') as fo:
|
|
||||||
gunzipped_bytes_obj = fo.read()
|
|
||||||
|
|
||||||
return gunzipped_bytes_obj.decode()'''
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
publisher.port = 6380
|
publisher.port = 6380
|
||||||
|
@ -77,6 +71,12 @@ if __name__ == '__main__':
|
||||||
processed_paste = 0
|
processed_paste = 0
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
file_name_paste = paste.split('/')[-1]
|
||||||
|
if len(file_name_paste)>255:
|
||||||
|
new_file_name_paste = '{}{}.gz'.format(file_name_paste[:215], str(uuid.uuid4()))
|
||||||
|
paste = rreplace(paste, file_name_paste, new_file_name_paste, 1)
|
||||||
|
|
||||||
# Creating the full filepath
|
# Creating the full filepath
|
||||||
filename = os.path.join(os.environ['AIL_HOME'],
|
filename = os.path.join(os.environ['AIL_HOME'],
|
||||||
p.config.get("Directories", "pastes"), paste)
|
p.config.get("Directories", "pastes"), paste)
|
||||||
|
|
12
bin/Onion.py
12
bin/Onion.py
|
@ -198,8 +198,12 @@ if __name__ == "__main__":
|
||||||
print(len(domains_list))
|
print(len(domains_list))
|
||||||
if len(domains_list) > 0:
|
if len(domains_list) > 0:
|
||||||
|
|
||||||
publisher.warning('{}Detected {} .onion(s);{}'.format(
|
if not activate_crawler:
|
||||||
to_print, len(domains_list),PST.p_path))
|
publisher.warning('{}Detected {} .onion(s);{}'.format(
|
||||||
|
to_print, len(domains_list),PST.p_path))
|
||||||
|
else:
|
||||||
|
publisher.info('{}Detected {} .onion(s);{}'.format(
|
||||||
|
to_print, len(domains_list),PST.p_path))
|
||||||
now = datetime.datetime.now()
|
now = datetime.datetime.now()
|
||||||
path = os.path.join('onions', str(now.year).zfill(4),
|
path = os.path.join('onions', str(now.year).zfill(4),
|
||||||
str(now.month).zfill(2),
|
str(now.month).zfill(2),
|
||||||
|
@ -220,6 +224,10 @@ if __name__ == "__main__":
|
||||||
else:
|
else:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
# too many subdomain
|
||||||
|
if len(domain.split('.')) > 5:
|
||||||
|
continue
|
||||||
|
|
||||||
if not r_onion.sismember('month_onion_up:{}'.format(date_month), domain) and not r_onion.sismember('onion_down:'+date , domain):
|
if not r_onion.sismember('month_onion_up:{}'.format(date_month), domain) and not r_onion.sismember('onion_down:'+date , domain):
|
||||||
if not r_onion.sismember('onion_domain_crawler_queue', domain):
|
if not r_onion.sismember('onion_domain_crawler_queue', domain):
|
||||||
print('send to onion crawler')
|
print('send to onion crawler')
|
||||||
|
|
|
@ -130,7 +130,11 @@ class TorSplashCrawler():
|
||||||
print('Connection to proxy refused')
|
print('Connection to proxy refused')
|
||||||
else:
|
else:
|
||||||
|
|
||||||
UUID = self.domains[0]+str(uuid.uuid4())
|
#avoid filename too big
|
||||||
|
if len(self.domains[0]) > 215:
|
||||||
|
UUID = self.domains[0][-215:]+str(uuid.uuid4())
|
||||||
|
else:
|
||||||
|
UUID = self.domains[0]+str(uuid.uuid4())
|
||||||
filename_paste = os.path.join(self.crawled_paste_filemame, UUID)
|
filename_paste = os.path.join(self.crawled_paste_filemame, UUID)
|
||||||
relative_filename_paste = os.path.join(self.crawler_path, UUID)
|
relative_filename_paste = os.path.join(self.crawler_path, UUID)
|
||||||
filename_screenshot = os.path.join(self.crawled_screenshot, UUID +'.png')
|
filename_screenshot = os.path.join(self.crawled_screenshot, UUID +'.png')
|
||||||
|
|
Loading…
Reference in New Issue