chg: [Domain] add domain object: tag + correlation (decoded items + tags + pgp + cryptocurrency)

pull/418/head
Terrtia 2019-10-17 16:39:43 +02:00
parent e759b560db
commit 48abb89d28
No known key found for this signature in database
GPG Key ID: 1E1B1F50D84613D0
10 changed files with 287 additions and 43 deletions

View File

@ -261,6 +261,9 @@ Redis and ARDB overview
| set_pgpdump_name:*name* | *item_path* |
| | |
| set_pgpdump_mail:*mail* | *item_path* |
| | |
| | |
| set_domain_pgpdump_**pgp_type**:**key** | **domain** |
##### Hset date:
| Key | Field | Value |
@ -288,11 +291,20 @@ Redis and ARDB overview
| item_pgpdump_name:*item_path* | *name* |
| | |
| item_pgpdump_mail:*item_path* | *mail* |
| | |
| | |
| domain_pgpdump_**pgp_type**:**domain** | **key** |
#### Cryptocurrency
Supported cryptocurrency:
- bitcoin
- bitcoin-cash
- dash
- etherum
- litecoin
- monero
- zcash
##### Hset:
| Key | Field | Value |
@ -303,7 +315,8 @@ Supported cryptocurrency:
##### set:
| Key | Value |
| ------ | ------ |
| set_cryptocurrency_**cryptocurrency name**:**cryptocurrency address** | **item_path** |
| set_cryptocurrency_**cryptocurrency name**:**cryptocurrency address** | **item_path** | PASTE
| domain_cryptocurrency_**cryptocurrency name**:**cryptocurrency address** | **domain** | DOMAIN
##### Hset date:
| Key | Field | Value |
@ -318,8 +331,14 @@ Supported cryptocurrency:
##### set:
| Key | Value |
| ------ | ------ |
| item_cryptocurrency_**cryptocurrency name**:**item_path** | **cryptocurrency address** |
| item_cryptocurrency_**cryptocurrency name**:**item_path** | **cryptocurrency address** | PASTE
| domain_cryptocurrency_**cryptocurrency name**:**item_path** | **cryptocurrency address** | DOMAIN
#### HASH
| Key | Value |
| ------ | ------ |
| hash_domain:**domain** | **hash** |
| domain_hash:**hash** | **domain** |
## DB9 - Crawler:
@ -362,6 +381,20 @@ Supported cryptocurrency:
}
```
##### CRAWLER QUEUES:
| SET - Key | Value |
| ------ | ------ |
| onion_crawler_queue | **url**;**item_id** | RE-CRAWL
| regular_crawler_queue | - |
| | |
| onion_crawler_priority_queue | **url**;**item_id** | USER
| regular_crawler_priority_queue | - |
| | |
| onion_crawler_discovery_queue | **url**;**item_id** | DISCOVER
| regular_crawler_discovery_queue | - |
##### TO CHANGE:
ARDB overview
----------------------------------------- SENTIMENT ------------------------------------

View File

@ -18,6 +18,7 @@ from pubsublogger import publisher
from Helper import Process
from packages import Paste
from packages import Item
import re
import signal
@ -120,6 +121,12 @@ def save_hash(decoder_name, message, date, decoded):
serv_metadata.zincrby('nb_seen_hash:'+hash, message, 1)# hash - paste map
serv_metadata.zincrby(decoder_name+'_hash:'+hash, message, 1) # number of b64 on this paste
# Domain Object
if Item.is_crawled(message):
domain = Item.get_item_domain(message)
serv_metadata.sadd('hash_domain:{}'.format(domain), hash) # domain - hash map
serv_metadata.sadd('domain_hash:{}'.format(hash), domain) # hash - domain map
def save_hash_on_disk(decode, type, hash, json_data):

View File

@ -21,6 +21,8 @@ from bs4 import BeautifulSoup
from Helper import Process
from packages import Paste
from packages import Pgp
class TimeoutException(Exception):
pass
@ -117,31 +119,6 @@ def extract_id_from_output(pgp_dump_outpout):
key_id = key_id.replace(key_id_str, '', 1)
set_key.add(key_id)
def save_pgp_data(type_pgp, date, item_path, data):
# create basic medata
if not serv_metadata.exists('pgpdump_metadata_{}:{}'.format(type_pgp, data)):
serv_metadata.hset('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'first_seen', date)
serv_metadata.hset('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'last_seen', date)
else:
last_seen = serv_metadata.hget('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'last_seen')
if not last_seen:
serv_metadata.hset('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'last_seen', date)
else:
if int(last_seen) < int(date):
serv_metadata.hset('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'last_seen', date)
# global set
serv_metadata.sadd('set_pgpdump_{}:{}'.format(type_pgp, data), item_path)
# daily
serv_metadata.hincrby('pgpdump:{}:{}'.format(type_pgp, date), data, 1)
# all type
serv_metadata.zincrby('pgpdump_all:{}'.format(type_pgp), data, 1)
# item_metadata
serv_metadata.sadd('item_pgpdump_{}:{}'.format(type_pgp, item_path), data)
if __name__ == '__main__':
# If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
@ -236,12 +213,12 @@ if __name__ == '__main__':
for key_id in set_key:
print(key_id)
save_pgp_data('key', date, message, key_id)
Pgp.save_pgp_data('key', date, message, key_id)
for name_id in set_name:
print(name_id)
save_pgp_data('name', date, message, name_id)
Pgp.save_pgp_data('name', date, message, name_id)
for mail_id in set_mail:
print(mail_id)
save_pgp_data('mail', date, message, mail_id)
Pgp.save_pgp_data('mail', date, message, mail_id)

View File

@ -16,6 +16,8 @@ import datetime
from pubsublogger import publisher
from Helper import Process
from packages import Paste
from packages import Item
def get_item_date(item_filename):
l_directory = item_filename.split('/')
@ -84,6 +86,12 @@ if __name__ == '__main__':
set_tag_metadata(tag, item_date)
server_metadata.sadd('tag:{}'.format(path), tag)
# Domain Object
if Item.is_crawled(path):
domain = Item.get_item_domain(path)
server_metadata.sadd('tag:{}'.format(domain), tag)
server.sadd('domain:{}:{}'.format(tag, item_date), domain)
curr_date = datetime.date.today().strftime("%Y%m%d")
server.hincrby('daily_tags:{}'.format(item_date), tag, 1)
p.populate_set_out(message, 'MISP_The_Hive_feeder')

View File

@ -2,8 +2,10 @@
# -*-coding:UTF-8 -*
import os
import sys
import redis
sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'modules/'))
import Flask_config
r_serv_metadata = Flask_config.r_serv_metadata
@ -14,9 +16,11 @@ class Correlation(object):
def __init__(self, correlation_name):
self.correlation_name = correlation_name
def _exist_corelation_field(self, correlation_type, field_name):
return r_serv_metadata.exists('set_{}_{}:{}'.format(self.correlation_name, correlation_type, field_name))
def _exist_corelation_field(self, correlation_type, field_name, item_type='paste'):
if type=='paste':
return r_serv_metadata.exists('set_{}_{}:{}'.format(self.correlation_name, correlation_type, field_name))
else:
return r_serv_metadata.exists('set_domain_{}_{}:{}'.format(self.correlation_name, correlation_type, field_name))
def _get_items(self, correlation_type, field_name):
res = r_serv_metadata.smembers('set_{}_{}:{}'.format(self.correlation_name, correlation_type, field_name))
@ -25,6 +29,12 @@ class Correlation(object):
else:
return []
def _get_domains(self, correlation_type, field_name):
res = r_serv_metadata.smembers('set_domain_{}_{}:{}'.format(self.correlation_name, correlation_type, field_name))
if res:
return list(res)
else:
return []
def _get_metadata(self, correlation_type, field_name):
meta_dict = {}
@ -35,14 +45,14 @@ class Correlation(object):
def _get_correlation_by_date(self, correlation_type, date):
return r_serv_metadata.hkeys('{}:{}:{}'.format(self.correlation_name, correlation_type, date))
def verify_correlation_field_request(self, request_dict, correlation_type):
def verify_correlation_field_request(self, request_dict, correlation_type, item_type='paste'):
if not request_dict:
return Response({'status': 'error', 'reason': 'Malformed JSON'}, 400)
return ({'status': 'error', 'reason': 'Malformed JSON'}, 400)
field_name = request_dict.get(correlation_type, None)
if not field_name:
return ( {'status': 'error', 'reason': 'Mandatory parameter(s) not provided'}, 400 )
if not self._exist_corelation_field(correlation_type, field_name):
if not self._exist_corelation_field(correlation_type, field_name, item_type=item_type):
return ( {'status': 'error', 'reason': 'Item not found'}, 404 )
def get_correlation(self, request_dict, correlation_type, field_name):
@ -58,7 +68,37 @@ class Correlation(object):
return (dict_resp, 200)
def get_correlation_domain(self, request_dict, correlation_type, field_name):
dict_resp = {}
dict_resp['domain'] = self._get_domains(correlation_type, field_name)
#if request_dict.get('metadata'):
# dict_resp['metadata'] = self._get_metadata(correlation_type, field_name)
#cryptocurrency_all:cryptocurrency name cryptocurrency address nb seen
dict_resp[correlation_type] = field_name
return (dict_resp, 200)
######## INTERNAL ########
def _get_domain_correlation_obj(correlation_name, correlation_type, domain):
print('domain_{}_{}:{}'.format(correlation_name, correlation_type, domain))
res = r_serv_metadata.smembers('domain_{}_{}:{}'.format(correlation_name, correlation_type, domain))
if res:
return list(res)
else:
return []
######## ########
######## API EXPOSED ########
def get_domain_correlation_obj(request_dict, correlation_name, correlation_type, domain):
dict_resp = {}
dict_resp[correlation_type] = _get_domain_correlation_obj(correlation_name, correlation_type, domain)
dict_resp['domain'] = domain
return (dict_resp, 200)
######## ########

View File

@ -10,11 +10,13 @@ from hashlib import sha256
sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'modules'))
import Flask_config
from Correlation import Correlation
import Item
r_serv_metadata = Flask_config.r_serv_metadata
all_cryptocurrency = ['bitcoin', 'etherum']
digits58 = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
#address_validation = {'bitcoin': 'base58', 'dash': 'base58'}
cryptocurrency = Correlation('cryptocurrency')
@ -52,6 +54,21 @@ def get_cryptocurrency(request_dict, cryptocurrency_type):
return cryptocurrency.get_correlation(request_dict, cryptocurrency_type, field_name)
# # TODO: add get all cryptocurrency option
def get_cryptocurrency_domain(request_dict, cryptocurrency_type):
res = cryptocurrency.verify_correlation_field_request(request_dict, cryptocurrency_type, item_type='domain')
if res:
return res
field_name = request_dict.get(cryptocurrency_type)
if not verify_cryptocurrency_address(cryptocurrency_type, field_name):
return ( {'status': 'error', 'reason': 'Invalid Cryptocurrency address'}, 400 )
return cryptocurrency.get_correlation_domain(request_dict, cryptocurrency_type, field_name)
def get_domain_cryptocurrency(request_dict, cryptocurrency_type):
return cryptocurrency.get_domain_correlation_obj(self, request_dict, cryptocurrency_type, domain)
def save_cryptocurrency_data(cryptocurrency_name, date, item_path, cryptocurrency_address):
# create basic medata
if not r_serv_metadata.exists('cryptocurrency_metadata_{}:{}'.format(cryptocurrency_name, cryptocurrency_address)):
@ -65,7 +82,8 @@ def save_cryptocurrency_data(cryptocurrency_name, date, item_path, cryptocurrenc
if int(last_seen) < int(date):
r_serv_metadata.hset('cryptocurrency_metadata_{}:{}'.format(cryptocurrency_name, cryptocurrency_address), 'last_seen', date)
# global set
## global set
# item
r_serv_metadata.sadd('set_cryptocurrency_{}:{}'.format(cryptocurrency_name, cryptocurrency_address), item_path)
# daily
@ -74,5 +92,12 @@ def save_cryptocurrency_data(cryptocurrency_name, date, item_path, cryptocurrenc
# all type
r_serv_metadata.zincrby('cryptocurrency_all:{}'.format(cryptocurrency_name), cryptocurrency_address, 1)
# item_metadata
## object_metadata
# item
r_serv_metadata.sadd('item_cryptocurrency_{}:{}'.format(cryptocurrency_name, item_path), cryptocurrency_address)
# domain
if Item.is_crawled(item_path):
domain = Item.get_item_domain(item_path)
r_serv_metadata.sadd('domain_cryptocurrency_{}:{}'.format(cryptocurrency_name, domain), cryptocurrency_address)
r_serv_metadata.sadd('set_domain_cryptocurrency_{}:{}'.format(cryptocurrency_name, cryptocurrency_address), domain)

85
bin/packages/Domain.py Executable file
View File

@ -0,0 +1,85 @@
#!/usr/bin/python3
"""
The ``Domain``
===================
"""
import os
import sys
import time
import redis
import Item
sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'modules/'))
import Flask_config
r_serv_onion = Flask_config.r_serv_onion
def get_domain_type(domain):
if str(domain).endswith('.onion'):
return 'onion'
else:
return 'regular'
def get_all_domain_up_by_type(domain_type):
if domain_type in domains:
list_domain = list(r_serv_onion.smembers('full_{}_up'.format(domain_type)))
return ({'type': domain_type, 'domains': list_domain}, 200)
else:
return ({"status": "error", "reason": "Invalid domain type"}, 400)
def get_domain_items(domain, root_item_id):
dom_item = get_domain_item_children(domain, root_item_id)
dom_item.append(root_item_id)
return dom_item
def get_domain_item_children(domain, root_item_id):
all_items = []
for item_id in Item.get_item_children(root_item_id):
if Item.is_item_in_domain(domain, item_id):
all_items.append(item_id)
all_items.extend(get_domain_item_children(domain, item_id))
return all_items
def get_link_tree():
pass
###
### correlation
###
def _get_domain_correlation(domain, correlation_name=None, correlation_type=None):
res = r_serv_metadata.smembers('item_{}_{}:{}'.format(correlation_name, correlation_type, item_id))
if res:
return list(res)
else:
return []
def get_item_bitcoin(item_id):
return _get_item_correlation('cryptocurrency', 'bitcoin', item_id)
def get_item_pgp_key(item_id):
return _get_item_correlation('pgpdump', 'key', item_id)
def get_item_pgp_name(item_id):
return _get_item_correlation('pgpdump', 'name', item_id)
def get_item_pgp_mail(item_id):
return _get_item_correlation('pgpdump', 'mail', item_id)
def get_item_pgp_correlation(item_id):
pass
class Domain(object):
"""docstring for Domain."""
def __init__(self, domain, port=80):
self.domain = str(domain)
## TODO: handle none port
self.type = get_domain_type(domain)

View File

@ -125,7 +125,6 @@ def get_item(request_dict):
###
def _get_item_correlation(correlation_name, correlation_type, item_id):
print('item_{}_{}:{}'.format(correlation_name, correlation_type, item_id))
res = r_serv_metadata.smembers('item_{}_{}:{}'.format(correlation_name, correlation_type, item_id))
if res:
return list(res)
@ -144,6 +143,8 @@ def get_item_pgp_name(item_id):
def get_item_pgp_mail(item_id):
return _get_item_correlation('pgpdump', 'mail', item_id)
def get_item_pgp_correlation(item_id):
pass
###
### GET Internal Module DESC
@ -153,3 +154,29 @@ def get_item_list_desc(list_item_id):
for item_id in list_item_id:
desc_list.append( {'id': item_id, 'date': get_item_date(item_id), 'tags': Tag.get_item_tags(item_id)} )
return desc_list
# # TODO: add an option to check the tag
def is_crawled(item_id):
return item_id.startswith('crawled')
def is_onion(item_id):
is_onion = False
if len(is_onion) > 62:
if is_crawled(item_id) and item_id[-42:-36] == '.onion':
is_onion = True
return is_onion
def is_item_in_domain(domain, item_id):
is_in_domain = False
domain_lenght = len(domain)
if len(item_id) > (domain_lenght+48):
if item_id[-36-domain_lenght:-36] == domain:
is_in_domain = True
return is_in_domain
def get_item_domain(item_id):
return item_id[19:-36]
def get_item_children(item_id):
return list(r_serv_metadata.smembers('paste_children:{}'.format(item_id)))

View File

@ -2,14 +2,18 @@
# -*-coding:UTF-8 -*
import os
import sys
import redis
from hashlib import sha256
sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'modules'))
import Flask_config
from Correlation import Correlation
r_serv_metadata = Flask_config.r_serv_metadata
from Correlation import Correlation
import Item
serv_metadata = Flask_config.r_serv_metadata
pgpdump = Correlation('pgpdump')
@ -23,3 +27,36 @@ def get_pgp(request_dict, pgp_type):
field_name = request_dict.get(pgp_type)
return pgpdump.get_correlation(request_dict, pgp_type, field_name)
def save_pgp_data(type_pgp, date, item_path, data):
# create basic medata
if not serv_metadata.exists('pgpdump_metadata_{}:{}'.format(type_pgp, data)):
serv_metadata.hset('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'first_seen', date)
serv_metadata.hset('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'last_seen', date)
else:
last_seen = serv_metadata.hget('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'last_seen')
if not last_seen:
serv_metadata.hset('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'last_seen', date)
else:
if int(last_seen) < int(date):
serv_metadata.hset('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'last_seen', date)
# global set
serv_metadata.sadd('set_pgpdump_{}:{}'.format(type_pgp, data), item_path)
# daily
serv_metadata.hincrby('pgpdump:{}:{}'.format(type_pgp, date), data, 1)
# all type
serv_metadata.zincrby('pgpdump_all:{}'.format(type_pgp), data, 1)
## object_metadata
# paste
serv_metadata.sadd('item_pgpdump_{}:{}'.format(type_pgp, item_path), data)
# domain object
if Item.is_crawled(item_path):
domain = Item.get_item_domain(item_path)
serv_metadata.sadd('domain_pgpdump_{}:{}'.format(type_pgp, domain), data)
serv_metadata.sadd('set_domain_pgpdump_{}:{}'.format(type_pgp, data), domain)

View File

@ -121,6 +121,11 @@ def add_item_tag(tag, item_path):
r_serv_metadata.sadd('tag:{}'.format(item_path), tag)
r_serv_tags.sadd('{}:{}'.format(tag, item_date), item_path)
if Item.is_crawled(item_path):
domain = Item.get_item_domain(item_path)
r_serv_metadata.sadd('tag:{}'.format(domain), tag)
r_serv_tags.sadd('domain:{}:{}'.format(tag, item_date), domain)
r_serv_tags.hincrby('daily_tags:{}'.format(item_date), tag, 1)
tag_first_seen = r_serv_tags.hget('tag_metadata:{}'.format(tag), 'last_seen')