mirror of https://github.com/CIRCL/AIL-framework
				
				
				
			
		
			
				
	
	
		
			153 lines
		
	
	
		
			6.1 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
			
		
		
	
	
			153 lines
		
	
	
		
			6.1 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
#!/usr/bin/env python3
 | 
						|
# -*-coding:UTF-8 -*
 | 
						|
 | 
						|
import os
 | 
						|
import sys
 | 
						|
import time
 | 
						|
import redis
 | 
						|
import datetime
 | 
						|
import configparser
 | 
						|
 | 
						|
def substract_date(date_from, date_to):
 | 
						|
    date_from = datetime.date(int(date_from[0:4]), int(date_from[4:6]), int(date_from[6:8]))
 | 
						|
    date_to = datetime.date(int(date_to[0:4]), int(date_to[4:6]), int(date_to[6:8]))
 | 
						|
    delta = date_to - date_from # timedelta
 | 
						|
    l_date = []
 | 
						|
    for i in range(delta.days + 1):
 | 
						|
        date = date_from + datetime.timedelta(i)
 | 
						|
        l_date.append( date.strftime('%Y%m%d') )
 | 
						|
    return l_date
 | 
						|
 | 
						|
def get_date_epoch(date):
 | 
						|
    return int(datetime.datetime(int(date[0:4]), int(date[4:6]), int(date[6:8])).timestamp())
 | 
						|
 | 
						|
def get_domain_root_from_paste_childrens(item_father, domain):
 | 
						|
    item_children = r_serv_metadata.smembers('paste_children:{}'.format(item_father))
 | 
						|
    domain_root = ''
 | 
						|
    for item_path in item_children:
 | 
						|
        # remove absolute_path
 | 
						|
        if PASTES_FOLDER in item_path:
 | 
						|
            r_serv_metadata.srem('paste_children:{}'.format(item_father), item_path)
 | 
						|
            item_path = item_path.replace(PASTES_FOLDER, '', 1)
 | 
						|
            r_serv_metadata.sadd('paste_children:{}'.format(item_father), item_path)
 | 
						|
        if domain in item_path:
 | 
						|
            domain_root = item_path
 | 
						|
    return domain_root
 | 
						|
 | 
						|
 | 
						|
if __name__ == '__main__':
 | 
						|
 | 
						|
    start_deb = time.time()
 | 
						|
 | 
						|
    configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg')
 | 
						|
    if not os.path.exists(configfile):
 | 
						|
        raise Exception('Unable to find the configuration file. \
 | 
						|
                        Did you set environment variables? \
 | 
						|
                        Or activate the virtualenv.')
 | 
						|
    cfg = configparser.ConfigParser()
 | 
						|
    cfg.read(configfile)
 | 
						|
 | 
						|
    PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], cfg.get("Directories", "pastes")) + '/'
 | 
						|
 | 
						|
    r_serv = redis.StrictRedis(
 | 
						|
        host=cfg.get("ARDB_DB", "host"),
 | 
						|
        port=cfg.getint("ARDB_DB", "port"),
 | 
						|
        db=cfg.getint("ARDB_DB", "db"),
 | 
						|
        decode_responses=True)
 | 
						|
 | 
						|
    r_serv_metadata = redis.StrictRedis(
 | 
						|
        host=cfg.get("ARDB_Metadata", "host"),
 | 
						|
        port=cfg.getint("ARDB_Metadata", "port"),
 | 
						|
        db=cfg.getint("ARDB_Metadata", "db"),
 | 
						|
        decode_responses=True)
 | 
						|
 | 
						|
    r_serv_tag = redis.StrictRedis(
 | 
						|
        host=cfg.get("ARDB_Tags", "host"),
 | 
						|
        port=cfg.getint("ARDB_Tags", "port"),
 | 
						|
        db=cfg.getint("ARDB_Tags", "db"),
 | 
						|
        decode_responses=True)
 | 
						|
 | 
						|
    r_serv_onion = redis.StrictRedis(
 | 
						|
        host=cfg.get("ARDB_Onion", "host"),
 | 
						|
        port=cfg.getint("ARDB_Onion", "port"),
 | 
						|
        db=cfg.getint("ARDB_Onion", "db"),
 | 
						|
        decode_responses=True)
 | 
						|
 | 
						|
    r_serv.set('ail:current_background_script', 'onions')
 | 
						|
    r_serv.set('ail:current_background_script_stat', 0)
 | 
						|
 | 
						|
    ## Update Onion ##
 | 
						|
    print('Updating ARDB_Onion ...')
 | 
						|
    index = 0
 | 
						|
    start = time.time()
 | 
						|
 | 
						|
    # clean down domain from db
 | 
						|
    date_from = '20180929'
 | 
						|
    date_today = datetime.date.today().strftime("%Y%m%d")
 | 
						|
    for date in substract_date(date_from, date_today):
 | 
						|
 | 
						|
        onion_down = r_serv_onion.smembers('onion_down:{}'.format(date))
 | 
						|
        #print(onion_down)
 | 
						|
        for onion_domain in onion_down:
 | 
						|
            if not r_serv_onion.sismember('full_onion_up', onion_domain):
 | 
						|
                # delete history
 | 
						|
                all_onion_history = r_serv_onion.lrange('onion_history:{}'.format(onion_domain), 0 ,-1)
 | 
						|
                if all_onion_history:
 | 
						|
                    for date_history in all_onion_history:
 | 
						|
                        #print('onion_history:{}:{}'.format(onion_domain, date_history))
 | 
						|
                        r_serv_onion.delete('onion_history:{}:{}'.format(onion_domain, date_history))
 | 
						|
                    r_serv_onion.delete('onion_history:{}'.format(onion_domain))
 | 
						|
 | 
						|
    #stats
 | 
						|
    total_domain = r_serv_onion.scard('full_onion_up')
 | 
						|
    nb_updated = 0
 | 
						|
    last_progress = 0
 | 
						|
 | 
						|
    # clean up domain
 | 
						|
    all_domain_up = r_serv_onion.smembers('full_onion_up')
 | 
						|
    for onion_domain in all_domain_up:
 | 
						|
        # delete history
 | 
						|
        all_onion_history = r_serv_onion.lrange('onion_history:{}'.format(onion_domain), 0 ,-1)
 | 
						|
        if all_onion_history:
 | 
						|
            for date_history in all_onion_history:
 | 
						|
                print('--------')
 | 
						|
                print('onion_history:{}:{}'.format(onion_domain, date_history))
 | 
						|
                item_father = r_serv_onion.lrange('onion_history:{}:{}'.format(onion_domain, date_history), 0, 0)
 | 
						|
                print('item_father: {}'.format(item_father))
 | 
						|
                try:
 | 
						|
                    item_father = item_father[0]
 | 
						|
                except IndexError:
 | 
						|
                    r_serv_onion.delete('onion_history:{}:{}'.format(onion_domain, date_history))
 | 
						|
                    continue
 | 
						|
                #print(item_father)
 | 
						|
                # delete old history
 | 
						|
                r_serv_onion.delete('onion_history:{}:{}'.format(onion_domain, date_history))
 | 
						|
                # create new history
 | 
						|
                root_key = get_domain_root_from_paste_childrens(item_father, onion_domain)
 | 
						|
                if root_key:
 | 
						|
                    r_serv_onion.zadd('crawler_history_onion:{}:80'.format(onion_domain), get_date_epoch(date_history), root_key)
 | 
						|
                    print('crawler_history_onion:{}:80   {}   {}'.format(onion_domain, get_date_epoch(date_history), root_key))
 | 
						|
                    #update service metadata: paste_parent
 | 
						|
                    r_serv_onion.hset('onion_metadata:{}'.format(onion_domain), 'paste_parent', root_key)
 | 
						|
 | 
						|
            r_serv_onion.delete('onion_history:{}'.format(onion_domain))
 | 
						|
 | 
						|
        r_serv_onion.hset('onion_metadata:{}'.format(onion_domain), 'ports', '80')
 | 
						|
        r_serv_onion.hdel('onion_metadata:{}'.format(onion_domain), 'last_seen')
 | 
						|
 | 
						|
        nb_updated += 1
 | 
						|
        progress = int((nb_updated * 100) /total_domain)
 | 
						|
        print('{}/{}    updated    {}%'.format(nb_updated, total_domain, progress))
 | 
						|
        # update progress stats
 | 
						|
        if progress != last_progress:
 | 
						|
            r_serv.set('ail:current_background_script_stat', progress)
 | 
						|
            last_progress = progress
 | 
						|
 | 
						|
 | 
						|
    end = time.time()
 | 
						|
    print('Updating ARDB_Onion Done => {} paths: {} s'.format(index, end - start))
 | 
						|
    print()
 | 
						|
    print('Done in {} s'.format(end - start_deb))
 | 
						|
 | 
						|
    r_serv.sadd('ail:update_v1.5', 'onions')
 |