mirror of https://github.com/CIRCL/AIL-framework
				
				
				
			chg: [Domain] add domain object: tag + correlation (decoded items + tags + pgp + cryptocurrency)
							parent
							
								
									e759b560db
								
							
						
					
					
						commit
						48abb89d28
					
				
							
								
								
									
										37
									
								
								OVERVIEW.md
								
								
								
								
							
							
						
						
									
										37
									
								
								OVERVIEW.md
								
								
								
								
							| 
						 | 
					@ -261,6 +261,9 @@ Redis and ARDB overview
 | 
				
			||||||
| set_pgpdump_name:*name* | *item_path* |
 | 
					| set_pgpdump_name:*name* | *item_path* |
 | 
				
			||||||
| | |
 | 
					| | |
 | 
				
			||||||
| set_pgpdump_mail:*mail* | *item_path* |
 | 
					| set_pgpdump_mail:*mail* | *item_path* |
 | 
				
			||||||
 | 
					| | |
 | 
				
			||||||
 | 
					| | |
 | 
				
			||||||
 | 
					| set_domain_pgpdump_**pgp_type**:**key** | **domain** |
 | 
				
			||||||
 | 
					
 | 
				
			||||||
##### Hset date:
 | 
					##### Hset date:
 | 
				
			||||||
| Key | Field | Value |
 | 
					| Key | Field | Value |
 | 
				
			||||||
| 
						 | 
					@ -288,11 +291,20 @@ Redis and ARDB overview
 | 
				
			||||||
| item_pgpdump_name:*item_path* | *name* |
 | 
					| item_pgpdump_name:*item_path* | *name* |
 | 
				
			||||||
| | |
 | 
					| | |
 | 
				
			||||||
| item_pgpdump_mail:*item_path* | *mail* |
 | 
					| item_pgpdump_mail:*item_path* | *mail* |
 | 
				
			||||||
 | 
					| | |
 | 
				
			||||||
 | 
					| | |
 | 
				
			||||||
 | 
					| domain_pgpdump_**pgp_type**:**domain** | **key** |
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#### Cryptocurrency
 | 
					#### Cryptocurrency
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Supported cryptocurrency:
 | 
					Supported cryptocurrency:
 | 
				
			||||||
- bitcoin
 | 
					- bitcoin
 | 
				
			||||||
 | 
					- bitcoin-cash
 | 
				
			||||||
 | 
					- dash
 | 
				
			||||||
 | 
					- etherum
 | 
				
			||||||
 | 
					- litecoin
 | 
				
			||||||
 | 
					- monero
 | 
				
			||||||
 | 
					- zcash
 | 
				
			||||||
 | 
					
 | 
				
			||||||
##### Hset:
 | 
					##### Hset:
 | 
				
			||||||
| Key | Field | Value |
 | 
					| Key | Field | Value |
 | 
				
			||||||
| 
						 | 
					@ -303,7 +315,8 @@ Supported cryptocurrency:
 | 
				
			||||||
##### set:
 | 
					##### set:
 | 
				
			||||||
| Key | Value |
 | 
					| Key | Value |
 | 
				
			||||||
| ------ | ------ |
 | 
					| ------ | ------ |
 | 
				
			||||||
| set_cryptocurrency_**cryptocurrency name**:**cryptocurrency address** | **item_path** |
 | 
					| set_cryptocurrency_**cryptocurrency name**:**cryptocurrency address** | **item_path** | PASTE
 | 
				
			||||||
 | 
					| domain_cryptocurrency_**cryptocurrency name**:**cryptocurrency address** | **domain** | DOMAIN
 | 
				
			||||||
 | 
					
 | 
				
			||||||
##### Hset date:
 | 
					##### Hset date:
 | 
				
			||||||
| Key | Field | Value |
 | 
					| Key | Field | Value |
 | 
				
			||||||
| 
						 | 
					@ -318,8 +331,14 @@ Supported cryptocurrency:
 | 
				
			||||||
##### set:
 | 
					##### set:
 | 
				
			||||||
| Key | Value |
 | 
					| Key | Value |
 | 
				
			||||||
| ------ | ------ |
 | 
					| ------ | ------ |
 | 
				
			||||||
| item_cryptocurrency_**cryptocurrency name**:**item_path** | **cryptocurrency address** |
 | 
					| item_cryptocurrency_**cryptocurrency name**:**item_path** | **cryptocurrency address** | PASTE
 | 
				
			||||||
 | 
					| domain_cryptocurrency_**cryptocurrency name**:**item_path** | **cryptocurrency address** | DOMAIN
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#### HASH
 | 
				
			||||||
 | 
					| Key | Value |
 | 
				
			||||||
 | 
					| ------ | ------ |
 | 
				
			||||||
 | 
					| hash_domain:**domain** | **hash** |
 | 
				
			||||||
 | 
					| domain_hash:**hash** | **domain** |
 | 
				
			||||||
 | 
					
 | 
				
			||||||
## DB9 - Crawler:
 | 
					## DB9 - Crawler:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -362,6 +381,20 @@ Supported cryptocurrency:
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					##### CRAWLER QUEUES:
 | 
				
			||||||
 | 
					| SET - Key | Value |
 | 
				
			||||||
 | 
					| ------ | ------ |
 | 
				
			||||||
 | 
					| onion_crawler_queue | **url**;**item_id** | RE-CRAWL
 | 
				
			||||||
 | 
					| regular_crawler_queue | - |
 | 
				
			||||||
 | 
					|  |  |
 | 
				
			||||||
 | 
					| onion_crawler_priority_queue   | **url**;**item_id** | USER
 | 
				
			||||||
 | 
					| regular_crawler_priority_queue | - |
 | 
				
			||||||
 | 
					|  |  |
 | 
				
			||||||
 | 
					| onion_crawler_discovery_queue   | **url**;**item_id** | DISCOVER
 | 
				
			||||||
 | 
					| regular_crawler_discovery_queue | - |
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					##### TO CHANGE:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
ARDB overview
 | 
					ARDB overview
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	----------------------------------------- SENTIMENT ------------------------------------
 | 
						----------------------------------------- SENTIMENT ------------------------------------
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -18,6 +18,7 @@ from pubsublogger import publisher
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from Helper import Process
 | 
					from Helper import Process
 | 
				
			||||||
from packages import Paste
 | 
					from packages import Paste
 | 
				
			||||||
 | 
					from packages import Item
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import re
 | 
					import re
 | 
				
			||||||
import signal
 | 
					import signal
 | 
				
			||||||
| 
						 | 
					@ -120,6 +121,12 @@ def save_hash(decoder_name, message, date, decoded):
 | 
				
			||||||
    serv_metadata.zincrby('nb_seen_hash:'+hash, message, 1)# hash - paste map
 | 
					    serv_metadata.zincrby('nb_seen_hash:'+hash, message, 1)# hash - paste map
 | 
				
			||||||
    serv_metadata.zincrby(decoder_name+'_hash:'+hash, message, 1) # number of b64 on this paste
 | 
					    serv_metadata.zincrby(decoder_name+'_hash:'+hash, message, 1) # number of b64 on this paste
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Domain Object
 | 
				
			||||||
 | 
					    if Item.is_crawled(message):
 | 
				
			||||||
 | 
					        domain = Item.get_item_domain(message)
 | 
				
			||||||
 | 
					        serv_metadata.sadd('hash_domain:{}'.format(domain), hash) # domain - hash map
 | 
				
			||||||
 | 
					        serv_metadata.sadd('domain_hash:{}'.format(hash), domain) # hash - domain map
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def save_hash_on_disk(decode, type, hash, json_data):
 | 
					def save_hash_on_disk(decode, type, hash, json_data):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -21,6 +21,8 @@ from bs4 import BeautifulSoup
 | 
				
			||||||
from Helper import Process
 | 
					from Helper import Process
 | 
				
			||||||
from packages import Paste
 | 
					from packages import Paste
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from packages import Pgp
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class TimeoutException(Exception):
 | 
					class TimeoutException(Exception):
 | 
				
			||||||
    pass
 | 
					    pass
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -117,31 +119,6 @@ def extract_id_from_output(pgp_dump_outpout):
 | 
				
			||||||
        key_id = key_id.replace(key_id_str, '', 1)
 | 
					        key_id = key_id.replace(key_id_str, '', 1)
 | 
				
			||||||
        set_key.add(key_id)
 | 
					        set_key.add(key_id)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def save_pgp_data(type_pgp, date, item_path, data):
 | 
					 | 
				
			||||||
    # create basic medata
 | 
					 | 
				
			||||||
    if not serv_metadata.exists('pgpdump_metadata_{}:{}'.format(type_pgp, data)):
 | 
					 | 
				
			||||||
        serv_metadata.hset('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'first_seen', date)
 | 
					 | 
				
			||||||
        serv_metadata.hset('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'last_seen', date)
 | 
					 | 
				
			||||||
    else:
 | 
					 | 
				
			||||||
        last_seen = serv_metadata.hget('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'last_seen')
 | 
					 | 
				
			||||||
        if not last_seen:
 | 
					 | 
				
			||||||
            serv_metadata.hset('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'last_seen', date)
 | 
					 | 
				
			||||||
        else:
 | 
					 | 
				
			||||||
            if int(last_seen) < int(date):
 | 
					 | 
				
			||||||
                serv_metadata.hset('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'last_seen', date)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    # global set
 | 
					 | 
				
			||||||
    serv_metadata.sadd('set_pgpdump_{}:{}'.format(type_pgp, data), item_path)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    # daily
 | 
					 | 
				
			||||||
    serv_metadata.hincrby('pgpdump:{}:{}'.format(type_pgp, date), data, 1)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    # all type
 | 
					 | 
				
			||||||
    serv_metadata.zincrby('pgpdump_all:{}'.format(type_pgp), data, 1)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    # item_metadata
 | 
					 | 
				
			||||||
    serv_metadata.sadd('item_pgpdump_{}:{}'.format(type_pgp, item_path), data)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
if __name__ == '__main__':
 | 
					if __name__ == '__main__':
 | 
				
			||||||
    # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
 | 
					    # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
 | 
				
			||||||
| 
						 | 
					@ -236,12 +213,12 @@ if __name__ == '__main__':
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        for key_id in set_key:
 | 
					        for key_id in set_key:
 | 
				
			||||||
            print(key_id)
 | 
					            print(key_id)
 | 
				
			||||||
            save_pgp_data('key', date, message, key_id)
 | 
					            Pgp.save_pgp_data('key', date, message, key_id)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        for name_id in set_name:
 | 
					        for name_id in set_name:
 | 
				
			||||||
            print(name_id)
 | 
					            print(name_id)
 | 
				
			||||||
            save_pgp_data('name', date, message, name_id)
 | 
					            Pgp.save_pgp_data('name', date, message, name_id)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        for mail_id in set_mail:
 | 
					        for mail_id in set_mail:
 | 
				
			||||||
            print(mail_id)
 | 
					            print(mail_id)
 | 
				
			||||||
            save_pgp_data('mail', date, message, mail_id)
 | 
					            Pgp.save_pgp_data('mail', date, message, mail_id)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -16,6 +16,8 @@ import datetime
 | 
				
			||||||
from pubsublogger import publisher
 | 
					from pubsublogger import publisher
 | 
				
			||||||
from Helper import Process
 | 
					from Helper import Process
 | 
				
			||||||
from packages import Paste
 | 
					from packages import Paste
 | 
				
			||||||
 | 
					from packages import Item
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def get_item_date(item_filename):
 | 
					def get_item_date(item_filename):
 | 
				
			||||||
    l_directory = item_filename.split('/')
 | 
					    l_directory = item_filename.split('/')
 | 
				
			||||||
| 
						 | 
					@ -84,6 +86,12 @@ if __name__ == '__main__':
 | 
				
			||||||
                set_tag_metadata(tag, item_date)
 | 
					                set_tag_metadata(tag, item_date)
 | 
				
			||||||
            server_metadata.sadd('tag:{}'.format(path), tag)
 | 
					            server_metadata.sadd('tag:{}'.format(path), tag)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            # Domain Object
 | 
				
			||||||
 | 
					            if Item.is_crawled(path):
 | 
				
			||||||
 | 
					                domain = Item.get_item_domain(path)
 | 
				
			||||||
 | 
					                server_metadata.sadd('tag:{}'.format(domain), tag)
 | 
				
			||||||
 | 
					                server.sadd('domain:{}:{}'.format(tag, item_date), domain)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            curr_date = datetime.date.today().strftime("%Y%m%d")
 | 
					            curr_date = datetime.date.today().strftime("%Y%m%d")
 | 
				
			||||||
            server.hincrby('daily_tags:{}'.format(item_date), tag, 1)
 | 
					            server.hincrby('daily_tags:{}'.format(item_date), tag, 1)
 | 
				
			||||||
            p.populate_set_out(message, 'MISP_The_Hive_feeder')
 | 
					            p.populate_set_out(message, 'MISP_The_Hive_feeder')
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2,8 +2,10 @@
 | 
				
			||||||
# -*-coding:UTF-8 -*
 | 
					# -*-coding:UTF-8 -*
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import os
 | 
					import os
 | 
				
			||||||
 | 
					import sys
 | 
				
			||||||
import redis
 | 
					import redis
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'modules/'))
 | 
				
			||||||
import Flask_config
 | 
					import Flask_config
 | 
				
			||||||
 | 
					
 | 
				
			||||||
r_serv_metadata = Flask_config.r_serv_metadata
 | 
					r_serv_metadata = Flask_config.r_serv_metadata
 | 
				
			||||||
| 
						 | 
					@ -14,9 +16,11 @@ class Correlation(object):
 | 
				
			||||||
    def __init__(self, correlation_name):
 | 
					    def __init__(self, correlation_name):
 | 
				
			||||||
        self.correlation_name = correlation_name
 | 
					        self.correlation_name = correlation_name
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _exist_corelation_field(self, correlation_type, field_name):
 | 
					    def _exist_corelation_field(self, correlation_type, field_name, item_type='paste'):
 | 
				
			||||||
 | 
					        if type=='paste':
 | 
				
			||||||
            return r_serv_metadata.exists('set_{}_{}:{}'.format(self.correlation_name, correlation_type, field_name))
 | 
					            return r_serv_metadata.exists('set_{}_{}:{}'.format(self.correlation_name, correlation_type, field_name))
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            return r_serv_metadata.exists('set_domain_{}_{}:{}'.format(self.correlation_name, correlation_type, field_name))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _get_items(self, correlation_type, field_name):
 | 
					    def _get_items(self, correlation_type, field_name):
 | 
				
			||||||
        res =  r_serv_metadata.smembers('set_{}_{}:{}'.format(self.correlation_name, correlation_type, field_name))
 | 
					        res =  r_serv_metadata.smembers('set_{}_{}:{}'.format(self.correlation_name, correlation_type, field_name))
 | 
				
			||||||
| 
						 | 
					@ -25,6 +29,12 @@ class Correlation(object):
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            return []
 | 
					            return []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def _get_domains(self, correlation_type, field_name):
 | 
				
			||||||
 | 
					        res =  r_serv_metadata.smembers('set_domain_{}_{}:{}'.format(self.correlation_name, correlation_type, field_name))
 | 
				
			||||||
 | 
					        if res:
 | 
				
			||||||
 | 
					            return list(res)
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            return []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _get_metadata(self, correlation_type, field_name):
 | 
					    def _get_metadata(self, correlation_type, field_name):
 | 
				
			||||||
        meta_dict = {}
 | 
					        meta_dict = {}
 | 
				
			||||||
| 
						 | 
					@ -35,14 +45,14 @@ class Correlation(object):
 | 
				
			||||||
    def _get_correlation_by_date(self, correlation_type, date):
 | 
					    def _get_correlation_by_date(self, correlation_type, date):
 | 
				
			||||||
        return r_serv_metadata.hkeys('{}:{}:{}'.format(self.correlation_name, correlation_type, date))
 | 
					        return r_serv_metadata.hkeys('{}:{}:{}'.format(self.correlation_name, correlation_type, date))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def verify_correlation_field_request(self, request_dict, correlation_type):
 | 
					    def verify_correlation_field_request(self, request_dict, correlation_type, item_type='paste'):
 | 
				
			||||||
        if not request_dict:
 | 
					        if not request_dict:
 | 
				
			||||||
            return Response({'status': 'error', 'reason': 'Malformed JSON'}, 400)
 | 
					            return ({'status': 'error', 'reason': 'Malformed JSON'}, 400)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        field_name = request_dict.get(correlation_type, None)
 | 
					        field_name = request_dict.get(correlation_type, None)
 | 
				
			||||||
        if not field_name:
 | 
					        if not field_name:
 | 
				
			||||||
            return ( {'status': 'error', 'reason': 'Mandatory parameter(s) not provided'}, 400 )
 | 
					            return ( {'status': 'error', 'reason': 'Mandatory parameter(s) not provided'}, 400 )
 | 
				
			||||||
        if not self._exist_corelation_field(correlation_type, field_name):
 | 
					        if not self._exist_corelation_field(correlation_type, field_name, item_type=item_type):
 | 
				
			||||||
            return ( {'status': 'error', 'reason': 'Item not found'}, 404 )
 | 
					            return ( {'status': 'error', 'reason': 'Item not found'}, 404 )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def get_correlation(self, request_dict, correlation_type, field_name):
 | 
					    def get_correlation(self, request_dict, correlation_type, field_name):
 | 
				
			||||||
| 
						 | 
					@ -58,7 +68,37 @@ class Correlation(object):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        return (dict_resp, 200)
 | 
					        return (dict_resp, 200)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def get_correlation_domain(self, request_dict, correlation_type, field_name):
 | 
				
			||||||
 | 
					        dict_resp = {}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        dict_resp['domain'] = self._get_domains(correlation_type, field_name)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        #if request_dict.get('metadata'):
 | 
				
			||||||
 | 
					        #    dict_resp['metadata'] = self._get_metadata(correlation_type, field_name)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#cryptocurrency_all:cryptocurrency name	cryptocurrency address	nb seen
 | 
					        dict_resp[correlation_type] = field_name
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        return (dict_resp, 200)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					######## INTERNAL ########
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def _get_domain_correlation_obj(correlation_name, correlation_type, domain):
 | 
				
			||||||
 | 
					    print('domain_{}_{}:{}'.format(correlation_name, correlation_type, domain))
 | 
				
			||||||
 | 
					    res = r_serv_metadata.smembers('domain_{}_{}:{}'.format(correlation_name, correlation_type, domain))
 | 
				
			||||||
 | 
					    if res:
 | 
				
			||||||
 | 
					        return list(res)
 | 
				
			||||||
 | 
					    else:
 | 
				
			||||||
 | 
					        return []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					########  ########
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					######## API EXPOSED ########
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_domain_correlation_obj(request_dict, correlation_name, correlation_type, domain):
 | 
				
			||||||
 | 
					    dict_resp = {}
 | 
				
			||||||
 | 
					    dict_resp[correlation_type] = _get_domain_correlation_obj(correlation_name, correlation_type, domain)
 | 
				
			||||||
 | 
					    dict_resp['domain'] = domain
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return (dict_resp, 200)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					########  ########
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -10,11 +10,13 @@ from hashlib import sha256
 | 
				
			||||||
sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'modules'))
 | 
					sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'modules'))
 | 
				
			||||||
import Flask_config
 | 
					import Flask_config
 | 
				
			||||||
from Correlation import Correlation
 | 
					from Correlation import Correlation
 | 
				
			||||||
 | 
					import Item
 | 
				
			||||||
 | 
					
 | 
				
			||||||
r_serv_metadata = Flask_config.r_serv_metadata
 | 
					r_serv_metadata = Flask_config.r_serv_metadata
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					all_cryptocurrency = ['bitcoin', 'etherum']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
digits58 = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
 | 
					digits58 = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
 | 
				
			||||||
#address_validation = {'bitcoin': 'base58', 'dash': 'base58'}
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
cryptocurrency = Correlation('cryptocurrency')
 | 
					cryptocurrency = Correlation('cryptocurrency')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -52,6 +54,21 @@ def get_cryptocurrency(request_dict, cryptocurrency_type):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return cryptocurrency.get_correlation(request_dict, cryptocurrency_type, field_name)
 | 
					    return cryptocurrency.get_correlation(request_dict, cryptocurrency_type, field_name)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# # TODO:  add get all cryptocurrency option
 | 
				
			||||||
 | 
					def get_cryptocurrency_domain(request_dict, cryptocurrency_type):
 | 
				
			||||||
 | 
					    res = cryptocurrency.verify_correlation_field_request(request_dict, cryptocurrency_type, item_type='domain')
 | 
				
			||||||
 | 
					    if res:
 | 
				
			||||||
 | 
					        return res
 | 
				
			||||||
 | 
					    field_name = request_dict.get(cryptocurrency_type)
 | 
				
			||||||
 | 
					    if not verify_cryptocurrency_address(cryptocurrency_type, field_name):
 | 
				
			||||||
 | 
					        return ( {'status': 'error', 'reason': 'Invalid Cryptocurrency address'}, 400 )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return cryptocurrency.get_correlation_domain(request_dict, cryptocurrency_type, field_name)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_domain_cryptocurrency(request_dict, cryptocurrency_type):
 | 
				
			||||||
 | 
					    return cryptocurrency.get_domain_correlation_obj(self, request_dict, cryptocurrency_type, domain)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def save_cryptocurrency_data(cryptocurrency_name, date, item_path, cryptocurrency_address):
 | 
					def save_cryptocurrency_data(cryptocurrency_name, date, item_path, cryptocurrency_address):
 | 
				
			||||||
    # create basic medata
 | 
					    # create basic medata
 | 
				
			||||||
    if not r_serv_metadata.exists('cryptocurrency_metadata_{}:{}'.format(cryptocurrency_name, cryptocurrency_address)):
 | 
					    if not r_serv_metadata.exists('cryptocurrency_metadata_{}:{}'.format(cryptocurrency_name, cryptocurrency_address)):
 | 
				
			||||||
| 
						 | 
					@ -65,7 +82,8 @@ def save_cryptocurrency_data(cryptocurrency_name, date, item_path, cryptocurrenc
 | 
				
			||||||
            if int(last_seen) < int(date):
 | 
					            if int(last_seen) < int(date):
 | 
				
			||||||
                r_serv_metadata.hset('cryptocurrency_metadata_{}:{}'.format(cryptocurrency_name, cryptocurrency_address), 'last_seen', date)
 | 
					                r_serv_metadata.hset('cryptocurrency_metadata_{}:{}'.format(cryptocurrency_name, cryptocurrency_address), 'last_seen', date)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # global set
 | 
					    ## global set
 | 
				
			||||||
 | 
					    # item
 | 
				
			||||||
    r_serv_metadata.sadd('set_cryptocurrency_{}:{}'.format(cryptocurrency_name, cryptocurrency_address), item_path)
 | 
					    r_serv_metadata.sadd('set_cryptocurrency_{}:{}'.format(cryptocurrency_name, cryptocurrency_address), item_path)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # daily
 | 
					    # daily
 | 
				
			||||||
| 
						 | 
					@ -74,5 +92,12 @@ def save_cryptocurrency_data(cryptocurrency_name, date, item_path, cryptocurrenc
 | 
				
			||||||
    # all type
 | 
					    # all type
 | 
				
			||||||
    r_serv_metadata.zincrby('cryptocurrency_all:{}'.format(cryptocurrency_name), cryptocurrency_address, 1)
 | 
					    r_serv_metadata.zincrby('cryptocurrency_all:{}'.format(cryptocurrency_name), cryptocurrency_address, 1)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # item_metadata
 | 
					    ## object_metadata
 | 
				
			||||||
 | 
					    # item
 | 
				
			||||||
    r_serv_metadata.sadd('item_cryptocurrency_{}:{}'.format(cryptocurrency_name, item_path), cryptocurrency_address)
 | 
					    r_serv_metadata.sadd('item_cryptocurrency_{}:{}'.format(cryptocurrency_name, item_path), cryptocurrency_address)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # domain
 | 
				
			||||||
 | 
					    if Item.is_crawled(item_path):
 | 
				
			||||||
 | 
					        domain = Item.get_item_domain(item_path)
 | 
				
			||||||
 | 
					        r_serv_metadata.sadd('domain_cryptocurrency_{}:{}'.format(cryptocurrency_name, domain), cryptocurrency_address)
 | 
				
			||||||
 | 
					        r_serv_metadata.sadd('set_domain_cryptocurrency_{}:{}'.format(cryptocurrency_name, cryptocurrency_address), domain)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,85 @@
 | 
				
			||||||
 | 
					#!/usr/bin/python3
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					"""
 | 
				
			||||||
 | 
					The ``Domain``
 | 
				
			||||||
 | 
					===================
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					"""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import os
 | 
				
			||||||
 | 
					import sys
 | 
				
			||||||
 | 
					import time
 | 
				
			||||||
 | 
					import redis
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import Item
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'modules/'))
 | 
				
			||||||
 | 
					import Flask_config
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					r_serv_onion = Flask_config.r_serv_onion
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_domain_type(domain):
 | 
				
			||||||
 | 
					    if str(domain).endswith('.onion'):
 | 
				
			||||||
 | 
					        return 'onion'
 | 
				
			||||||
 | 
					    else:
 | 
				
			||||||
 | 
					        return 'regular'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_all_domain_up_by_type(domain_type):
 | 
				
			||||||
 | 
					    if domain_type in domains:
 | 
				
			||||||
 | 
					        list_domain = list(r_serv_onion.smembers('full_{}_up'.format(domain_type)))
 | 
				
			||||||
 | 
					        return ({'type': domain_type, 'domains': list_domain}, 200)
 | 
				
			||||||
 | 
					    else:
 | 
				
			||||||
 | 
					        return ({"status": "error", "reason": "Invalid domain type"}, 400)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_domain_items(domain, root_item_id):
 | 
				
			||||||
 | 
					    dom_item =  get_domain_item_children(domain, root_item_id)
 | 
				
			||||||
 | 
					    dom_item.append(root_item_id)
 | 
				
			||||||
 | 
					    return dom_item
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_domain_item_children(domain, root_item_id):
 | 
				
			||||||
 | 
					    all_items = []
 | 
				
			||||||
 | 
					    for item_id in Item.get_item_children(root_item_id):
 | 
				
			||||||
 | 
					        if Item.is_item_in_domain(domain, item_id):
 | 
				
			||||||
 | 
					            all_items.append(item_id)
 | 
				
			||||||
 | 
					            all_items.extend(get_domain_item_children(domain, item_id))
 | 
				
			||||||
 | 
					    return all_items
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_link_tree():
 | 
				
			||||||
 | 
					    pass
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					###
 | 
				
			||||||
 | 
					### correlation
 | 
				
			||||||
 | 
					###
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def _get_domain_correlation(domain, correlation_name=None, correlation_type=None):
 | 
				
			||||||
 | 
					    res = r_serv_metadata.smembers('item_{}_{}:{}'.format(correlation_name, correlation_type, item_id))
 | 
				
			||||||
 | 
					    if res:
 | 
				
			||||||
 | 
					        return list(res)
 | 
				
			||||||
 | 
					    else:
 | 
				
			||||||
 | 
					        return []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_item_bitcoin(item_id):
 | 
				
			||||||
 | 
					    return _get_item_correlation('cryptocurrency', 'bitcoin', item_id)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_item_pgp_key(item_id):
 | 
				
			||||||
 | 
					    return _get_item_correlation('pgpdump', 'key', item_id)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_item_pgp_name(item_id):
 | 
				
			||||||
 | 
					    return _get_item_correlation('pgpdump', 'name', item_id)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_item_pgp_mail(item_id):
 | 
				
			||||||
 | 
					    return _get_item_correlation('pgpdump', 'mail', item_id)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_item_pgp_correlation(item_id):
 | 
				
			||||||
 | 
					    pass
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class Domain(object):
 | 
				
			||||||
 | 
					    """docstring for Domain."""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def __init__(self, domain, port=80):
 | 
				
			||||||
 | 
					        self.domain = str(domain)
 | 
				
			||||||
 | 
					        ## TODO: handle none port
 | 
				
			||||||
 | 
					        self.type = get_domain_type(domain)
 | 
				
			||||||
| 
						 | 
					@ -125,7 +125,6 @@ def get_item(request_dict):
 | 
				
			||||||
###
 | 
					###
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def _get_item_correlation(correlation_name, correlation_type, item_id):
 | 
					def _get_item_correlation(correlation_name, correlation_type, item_id):
 | 
				
			||||||
    print('item_{}_{}:{}'.format(correlation_name, correlation_type, item_id))
 | 
					 | 
				
			||||||
    res = r_serv_metadata.smembers('item_{}_{}:{}'.format(correlation_name, correlation_type, item_id))
 | 
					    res = r_serv_metadata.smembers('item_{}_{}:{}'.format(correlation_name, correlation_type, item_id))
 | 
				
			||||||
    if res:
 | 
					    if res:
 | 
				
			||||||
        return list(res)
 | 
					        return list(res)
 | 
				
			||||||
| 
						 | 
					@ -144,6 +143,8 @@ def get_item_pgp_name(item_id):
 | 
				
			||||||
def get_item_pgp_mail(item_id):
 | 
					def get_item_pgp_mail(item_id):
 | 
				
			||||||
    return _get_item_correlation('pgpdump', 'mail', item_id)
 | 
					    return _get_item_correlation('pgpdump', 'mail', item_id)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_item_pgp_correlation(item_id):
 | 
				
			||||||
 | 
					    pass
 | 
				
			||||||
 | 
					
 | 
				
			||||||
###
 | 
					###
 | 
				
			||||||
### GET Internal Module DESC
 | 
					### GET Internal Module DESC
 | 
				
			||||||
| 
						 | 
					@ -153,3 +154,29 @@ def get_item_list_desc(list_item_id):
 | 
				
			||||||
    for item_id in list_item_id:
 | 
					    for item_id in list_item_id:
 | 
				
			||||||
        desc_list.append( {'id': item_id, 'date': get_item_date(item_id), 'tags': Tag.get_item_tags(item_id)} )
 | 
					        desc_list.append( {'id': item_id, 'date': get_item_date(item_id), 'tags': Tag.get_item_tags(item_id)} )
 | 
				
			||||||
    return desc_list
 | 
					    return desc_list
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# # TODO: add an option to check the tag
 | 
				
			||||||
 | 
					def is_crawled(item_id):
 | 
				
			||||||
 | 
					    return item_id.startswith('crawled')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def is_onion(item_id):
 | 
				
			||||||
 | 
					    is_onion = False
 | 
				
			||||||
 | 
					    if len(is_onion) > 62:
 | 
				
			||||||
 | 
					        if is_crawled(item_id) and item_id[-42:-36] == '.onion':
 | 
				
			||||||
 | 
					            is_onion = True
 | 
				
			||||||
 | 
					    return is_onion
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def is_item_in_domain(domain, item_id):
 | 
				
			||||||
 | 
					    is_in_domain = False
 | 
				
			||||||
 | 
					    domain_lenght = len(domain)
 | 
				
			||||||
 | 
					    if len(item_id) > (domain_lenght+48):
 | 
				
			||||||
 | 
					        if item_id[-36-domain_lenght:-36] == domain:
 | 
				
			||||||
 | 
					            is_in_domain = True
 | 
				
			||||||
 | 
					    return is_in_domain
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_item_domain(item_id):
 | 
				
			||||||
 | 
					    return item_id[19:-36]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_item_children(item_id):
 | 
				
			||||||
 | 
					    return list(r_serv_metadata.smembers('paste_children:{}'.format(item_id)))
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2,14 +2,18 @@
 | 
				
			||||||
# -*-coding:UTF-8 -*
 | 
					# -*-coding:UTF-8 -*
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import os
 | 
					import os
 | 
				
			||||||
 | 
					import sys
 | 
				
			||||||
import redis
 | 
					import redis
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from hashlib import sha256
 | 
					from hashlib import sha256
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'modules'))
 | 
				
			||||||
import Flask_config
 | 
					import Flask_config
 | 
				
			||||||
from Correlation import Correlation
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
r_serv_metadata = Flask_config.r_serv_metadata
 | 
					from Correlation import Correlation
 | 
				
			||||||
 | 
					import Item
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					serv_metadata = Flask_config.r_serv_metadata
 | 
				
			||||||
 | 
					
 | 
				
			||||||
pgpdump = Correlation('pgpdump')
 | 
					pgpdump = Correlation('pgpdump')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -23,3 +27,36 @@ def get_pgp(request_dict, pgp_type):
 | 
				
			||||||
    field_name = request_dict.get(pgp_type)
 | 
					    field_name = request_dict.get(pgp_type)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return pgpdump.get_correlation(request_dict, pgp_type, field_name)
 | 
					    return pgpdump.get_correlation(request_dict, pgp_type, field_name)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def save_pgp_data(type_pgp, date, item_path, data):
 | 
				
			||||||
 | 
					    # create basic medata
 | 
				
			||||||
 | 
					    if not serv_metadata.exists('pgpdump_metadata_{}:{}'.format(type_pgp, data)):
 | 
				
			||||||
 | 
					        serv_metadata.hset('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'first_seen', date)
 | 
				
			||||||
 | 
					        serv_metadata.hset('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'last_seen', date)
 | 
				
			||||||
 | 
					    else:
 | 
				
			||||||
 | 
					        last_seen = serv_metadata.hget('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'last_seen')
 | 
				
			||||||
 | 
					        if not last_seen:
 | 
				
			||||||
 | 
					            serv_metadata.hset('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'last_seen', date)
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            if int(last_seen) < int(date):
 | 
				
			||||||
 | 
					                serv_metadata.hset('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'last_seen', date)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # global set
 | 
				
			||||||
 | 
					    serv_metadata.sadd('set_pgpdump_{}:{}'.format(type_pgp, data), item_path)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # daily
 | 
				
			||||||
 | 
					    serv_metadata.hincrby('pgpdump:{}:{}'.format(type_pgp, date), data, 1)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # all type
 | 
				
			||||||
 | 
					    serv_metadata.zincrby('pgpdump_all:{}'.format(type_pgp), data, 1)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    ## object_metadata
 | 
				
			||||||
 | 
					    # paste
 | 
				
			||||||
 | 
					    serv_metadata.sadd('item_pgpdump_{}:{}'.format(type_pgp, item_path), data)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # domain object
 | 
				
			||||||
 | 
					    if Item.is_crawled(item_path):
 | 
				
			||||||
 | 
					        domain = Item.get_item_domain(item_path)
 | 
				
			||||||
 | 
					        serv_metadata.sadd('domain_pgpdump_{}:{}'.format(type_pgp, domain), data)
 | 
				
			||||||
 | 
					        serv_metadata.sadd('set_domain_pgpdump_{}:{}'.format(type_pgp, data), domain)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -121,6 +121,11 @@ def add_item_tag(tag, item_path):
 | 
				
			||||||
    r_serv_metadata.sadd('tag:{}'.format(item_path), tag)
 | 
					    r_serv_metadata.sadd('tag:{}'.format(item_path), tag)
 | 
				
			||||||
    r_serv_tags.sadd('{}:{}'.format(tag, item_date), item_path)
 | 
					    r_serv_tags.sadd('{}:{}'.format(tag, item_date), item_path)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if Item.is_crawled(item_path):
 | 
				
			||||||
 | 
					        domain = Item.get_item_domain(item_path)
 | 
				
			||||||
 | 
					        r_serv_metadata.sadd('tag:{}'.format(domain), tag)
 | 
				
			||||||
 | 
					        r_serv_tags.sadd('domain:{}:{}'.format(tag, item_date), domain)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    r_serv_tags.hincrby('daily_tags:{}'.format(item_date), tag, 1)
 | 
					    r_serv_tags.hincrby('daily_tags:{}'.format(item_date), tag, 1)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    tag_first_seen = r_serv_tags.hget('tag_metadata:{}'.format(tag), 'last_seen')
 | 
					    tag_first_seen = r_serv_tags.hget('tag_metadata:{}'.format(tag), 'last_seen')
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue