mirror of https://github.com/CIRCL/AIL-framework
fix: [v1.5 background update screenshot] remove duplicate files
parent
499c07a93e
commit
41e6b4ec59
|
@ -36,22 +36,22 @@ if __name__ == "__main__":
|
|||
r_serv.set('ail:update_in_progress', 'v1.5')
|
||||
r_serv.set('ail:current_background_update', 'v1.5')
|
||||
if not r_serv.sismember('ail:update_v1.5', 'onions'):
|
||||
update_file = os.path.join(os.environ['AIL_HOME'], 'update', 'v1.4', 'Update-ARDB_Onions.py')
|
||||
update_file = os.path.join(os.environ['AIL_HOME'], 'update', 'v1.5', 'Update-ARDB_Onions.py')
|
||||
process = subprocess.run(['python' ,update_file])
|
||||
|
||||
if not r_serv.sismember('ail:update_v1.5', 'metadata'):
|
||||
update_file = os.path.join(os.environ['AIL_HOME'], 'update', 'v1.4', 'Update-ARDB_Metadata.py')
|
||||
update_file = os.path.join(os.environ['AIL_HOME'], 'update', 'v1.5', 'Update-ARDB_Metadata.py')
|
||||
process = subprocess.run(['python' ,update_file])
|
||||
|
||||
if not r_serv.sismember('ail:update_v1.5', 'tags'):
|
||||
update_file = os.path.join(os.environ['AIL_HOME'], 'update', 'v1.4', 'Update-ARDB_Tags.py')
|
||||
update_file = os.path.join(os.environ['AIL_HOME'], 'update', 'v1.5', 'Update-ARDB_Tags.py')
|
||||
process = subprocess.run(['python' ,update_file])
|
||||
|
||||
if not r_serv.sismember('ail:update_v1.5', 'tags_background'):
|
||||
update_file = os.path.join(os.environ['AIL_HOME'], 'update', 'v1.4', 'Update-ARDB_Tags_background.py')
|
||||
update_file = os.path.join(os.environ['AIL_HOME'], 'update', 'v1.5', 'Update-ARDB_Tags_background.py')
|
||||
process = subprocess.run(['python' ,update_file])
|
||||
if not r_serv.sismember('ail:update_v1.5', 'crawled_screenshot'):
|
||||
update_file = os.path.join(os.environ['AIL_HOME'], 'update', 'v1.4', 'Update-ARDB_Onions_screenshots.py')
|
||||
update_file = os.path.join(os.environ['AIL_HOME'], 'update', 'v1.5', 'Update-ARDB_Onions_screenshots.py')
|
||||
process = subprocess.run(['python' ,update_file])
|
||||
if r_serv.scard('ail:update_v1.5') != 5:
|
||||
r_serv.set('ail:update_error', 'Update v1.5 Failed, please relaunch the bin/update-background.py script')
|
||||
|
|
|
@ -1,68 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*-coding:UTF-8 -*
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import redis
|
||||
import datetime
|
||||
import configparser
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
start_deb = time.time()
|
||||
|
||||
configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg')
|
||||
if not os.path.exists(configfile):
|
||||
raise Exception('Unable to find the configuration file. \
|
||||
Did you set environment variables? \
|
||||
Or activate the virtualenv.')
|
||||
cfg = configparser.ConfigParser()
|
||||
cfg.read(configfile)
|
||||
|
||||
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], cfg.get("Directories", "pastes")) + '/'
|
||||
|
||||
r_serv = redis.StrictRedis(
|
||||
host=cfg.get("ARDB_DB", "host"),
|
||||
port=cfg.getint("ARDB_DB", "port"),
|
||||
db=cfg.getint("ARDB_DB", "db"),
|
||||
decode_responses=True)
|
||||
|
||||
r_serv_onion = redis.StrictRedis(
|
||||
host=cfg.get("ARDB_Onion", "host"),
|
||||
port=cfg.getint("ARDB_Onion", "port"),
|
||||
db=cfg.getint("ARDB_Onion", "db"),
|
||||
decode_responses=True)
|
||||
|
||||
print()
|
||||
print('Updating ARDB_Onion ...')
|
||||
index = 0
|
||||
start = time.time()
|
||||
|
||||
# update crawler queue
|
||||
for elem in r_serv_onion.smembers('onion_crawler_queue'):
|
||||
if PASTES_FOLDER in elem:
|
||||
r_serv_onion.srem('onion_crawler_queue', elem)
|
||||
r_serv_onion.sadd('onion_crawler_queue', elem.replace(PASTES_FOLDER, '', 1))
|
||||
index = index +1
|
||||
for elem in r_serv_onion.smembers('onion_crawler_priority_queue'):
|
||||
if PASTES_FOLDER in elem:
|
||||
r_serv_onion.srem('onion_crawler_queue', elem)
|
||||
r_serv_onion.sadd('onion_crawler_queue', elem.replace(PASTES_FOLDER, '', 1))
|
||||
index = index +1
|
||||
|
||||
end = time.time()
|
||||
print('Updating ARDB_Onion Done => {} paths: {} s'.format(index, end - start))
|
||||
print()
|
||||
|
||||
#Set current ail version
|
||||
r_serv.set('ail:version', 'v1.5')
|
||||
|
||||
#Set current update_in_progress
|
||||
r_serv.set('ail:update_in_progress', 'v1.5')
|
||||
r_serv.set('ail:current_background_update', 'v1.5')
|
||||
|
||||
#Set current ail version
|
||||
r_serv.set('ail:update_date_v1.5', datetime.datetime.now().strftime("%Y%m%d"))
|
||||
|
||||
print('Done in {} s'.format(end - start_deb))
|
|
@ -1,60 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
|
||||
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
|
||||
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
|
||||
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
|
||||
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
|
||||
|
||||
export PATH=$AIL_HOME:$PATH
|
||||
export PATH=$AIL_REDIS:$PATH
|
||||
export PATH=$AIL_ARDB:$PATH
|
||||
export PATH=$AIL_BIN:$PATH
|
||||
export PATH=$AIL_FLASK:$PATH
|
||||
|
||||
GREEN="\\033[1;32m"
|
||||
DEFAULT="\\033[0;39m"
|
||||
|
||||
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
|
||||
bash ${AIL_BIN}/LAUNCH.sh -k &
|
||||
wait
|
||||
|
||||
echo ""
|
||||
bash -c "bash ${AIL_HOME}/update/bin/Update_Redis.sh"
|
||||
#bash -c "bash ${AIL_HOME}/update/bin/Update_ARDB.sh"
|
||||
|
||||
echo ""
|
||||
echo -e $GREEN"Update DomainClassifier"$DEFAULT
|
||||
echo ""
|
||||
pip3 install --upgrade --force-reinstall git+https://github.com/D4-project/BGP-Ranking.git/@28013297efb039d2ebbce96ee2d89493f6ae56b0#subdirectory=client&egg=pybgpranking
|
||||
pip3 install --upgrade --force-reinstall git+https://github.com/adulau/DomainClassifier.git
|
||||
wait
|
||||
echo ""
|
||||
|
||||
echo ""
|
||||
echo -e $GREEN"Update Web thirdparty"$DEFAULT
|
||||
echo ""
|
||||
bash ${AIL_FLASK}update_thirdparty.sh &
|
||||
wait
|
||||
echo ""
|
||||
|
||||
bash ${AIL_BIN}LAUNCH.sh -lav &
|
||||
wait
|
||||
echo ""
|
||||
|
||||
echo ""
|
||||
echo -e $GREEN"Fixing ARDB ..."$DEFAULT
|
||||
echo ""
|
||||
python ${AIL_HOME}/update/v1.4/Update.py &
|
||||
wait
|
||||
echo ""
|
||||
echo ""
|
||||
|
||||
echo ""
|
||||
echo -e $GREEN"Shutting down ARDB ..."$DEFAULT
|
||||
bash ${AIL_BIN}/LAUNCH.sh -k &
|
||||
wait
|
||||
|
||||
echo ""
|
||||
|
||||
exit 0
|
|
@ -100,6 +100,8 @@ if __name__ == '__main__':
|
|||
os.makedirs(dirname)
|
||||
if not os.path.exists(filename_img):
|
||||
os.rename(img_path, filename_img)
|
||||
else:
|
||||
os.remove(img_path)
|
||||
|
||||
item = os.path.join('crawled', date[0:4], date[4:6], date[6:8], file[:-4])
|
||||
# add item metadata
|
|
@ -5,6 +5,7 @@ import os
|
|||
import sys
|
||||
import time
|
||||
import redis
|
||||
import datetime
|
||||
import configparser
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
@ -21,16 +22,10 @@ if __name__ == '__main__':
|
|||
|
||||
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], cfg.get("Directories", "pastes")) + '/'
|
||||
|
||||
r_serv_metadata = redis.StrictRedis(
|
||||
host=cfg.get("ARDB_Metadata", "host"),
|
||||
port=cfg.getint("ARDB_Metadata", "port"),
|
||||
db=cfg.getint("ARDB_Metadata", "db"),
|
||||
decode_responses=True)
|
||||
|
||||
r_serv_tag = redis.StrictRedis(
|
||||
host=cfg.get("ARDB_Tags", "host"),
|
||||
port=cfg.getint("ARDB_Tags", "port"),
|
||||
db=cfg.getint("ARDB_Tags", "db"),
|
||||
r_serv = redis.StrictRedis(
|
||||
host=cfg.get("ARDB_DB", "host"),
|
||||
port=cfg.getint("ARDB_DB", "port"),
|
||||
db=cfg.getint("ARDB_DB", "db"),
|
||||
decode_responses=True)
|
||||
|
||||
r_serv_onion = redis.StrictRedis(
|
||||
|
@ -39,189 +34,35 @@ if __name__ == '__main__':
|
|||
db=cfg.getint("ARDB_Onion", "db"),
|
||||
decode_responses=True)
|
||||
|
||||
## Update metadata ##
|
||||
print('Updating ARDB_Metadata ...')
|
||||
index = 0
|
||||
start = time.time()
|
||||
|
||||
string_keys_to_rename = ['misp_events:{}*'.format(PASTES_FOLDER), 'hive_cases:{}*'.format(PASTES_FOLDER)]
|
||||
for key_to_rename in string_keys_to_rename:
|
||||
|
||||
keys_to_rename = []
|
||||
for key in r_serv_metadata.scan_iter(key_to_rename):
|
||||
new_key = key.replace(PASTES_FOLDER, '', 1)
|
||||
keys_to_rename.append( (key, new_key) )
|
||||
index = index + 1
|
||||
for key, new_key in keys_to_rename:
|
||||
r_serv_metadata.rename(key, new_key)
|
||||
|
||||
keys_to_rename = None
|
||||
|
||||
set_keys_to_rename = ['tag:{}*'.format(PASTES_FOLDER), 'hash_paste:{}*'.format(PASTES_FOLDER), 'base64_paste:{}*'.format(PASTES_FOLDER), 'binary_paste:{}*'.format(PASTES_FOLDER), 'hexadecimal_paste:{}*'.format(PASTES_FOLDER), 'paste_regular_external_links:{}*'.format(PASTES_FOLDER), 'paste_onion_external_links:{}*'.format(PASTES_FOLDER), 'paste_children:{}*'.format(PASTES_FOLDER)]
|
||||
for key_to_rename in set_keys_to_rename:
|
||||
|
||||
keys_to_remove = []
|
||||
keys_to_rename = []
|
||||
for key in r_serv_metadata.scan_iter(key_to_rename):
|
||||
new_key = key.replace(PASTES_FOLDER, '', 1)
|
||||
# a set with this key already exist
|
||||
if r_serv_metadata.exists(new_key):
|
||||
# save data
|
||||
for new_key_value in r_serv_metadata.smembers(key):
|
||||
r_serv_metadata.sadd(new_key, new_key_value)
|
||||
keys_to_remove.append(key)
|
||||
else:
|
||||
keys_to_rename.append( (key, new_key) )
|
||||
index = index + 1
|
||||
for key in keys_to_remove:
|
||||
r_serv_metadata.delete(key)
|
||||
for key, new_key in keys_to_rename:
|
||||
r_serv_metadata.rename(key, new_key)
|
||||
|
||||
keys_to_remove = None
|
||||
keys_to_rename = None
|
||||
|
||||
|
||||
zset_keys_to_rename = ['nb_seen_hash:*', 'base64_hash:*', 'binary_hash:*']
|
||||
for key_to_rename in zset_keys_to_rename:
|
||||
|
||||
keys_to_remove = []
|
||||
zkeys_to_remove = []
|
||||
keys_to_add = []
|
||||
for key in r_serv_metadata.scan_iter(key_to_rename):
|
||||
temp = []
|
||||
for zset_key, value in r_serv_metadata.zscan_iter(key, '*{}*'.format(PASTES_FOLDER)):
|
||||
new_key = zset_key.replace(PASTES_FOLDER, '', 1)
|
||||
index = index +1
|
||||
temp.append((key, zset_key))
|
||||
keys_to_add.append((key, new_key, value))
|
||||
if 0 < len(temp) < r_serv_metadata.zcard(key):
|
||||
zkeys_to_remove.extend(temp)
|
||||
else:
|
||||
keys_to_remove.append(key)
|
||||
for key in keys_to_remove:
|
||||
r_serv_metadata.delete(key)
|
||||
for key, zset_key in zkeys_to_remove:
|
||||
r_serv_metadata.zrem(key, zset_key)
|
||||
for key, new_key, value in keys_to_add:
|
||||
r_serv_metadata.zincrby(key, new_key, int(value))
|
||||
keys_to_remove = None
|
||||
zkeys_to_remove = None
|
||||
keys_to_add = None
|
||||
|
||||
set_keys_to_rename = ['paste_children:*']
|
||||
for key_to_rename in set_keys_to_rename:
|
||||
keys_to_remove = []
|
||||
skeys_to_remove = []
|
||||
keys_to_add = []
|
||||
for key in r_serv_metadata.scan_iter(key_to_rename):
|
||||
temp = []
|
||||
for set_key in r_serv_metadata.sscan_iter(key, '*{}*'.format(PASTES_FOLDER)):
|
||||
new_key = set_key.replace(PASTES_FOLDER, '', 1)
|
||||
index = index +1
|
||||
temp.append((key, set_key))
|
||||
keys_to_add.append((key, new_key))
|
||||
if 0 < len(temp) < r_serv_metadata.scard(key):
|
||||
skeys_to_remove.extend(temp)
|
||||
else:
|
||||
keys_to_remove.append(key)
|
||||
for key in keys_to_remove:
|
||||
r_serv_metadata.delete(key)
|
||||
for key, set_key in skeys_to_remove:
|
||||
r_serv_metadata.srem(key, set_key)
|
||||
for key, new_key in keys_to_add:
|
||||
r_serv_metadata.sadd(key, new_key)
|
||||
keys_to_remove = None
|
||||
skeys_to_remove = None
|
||||
keys_to_add = None
|
||||
|
||||
hset_keys_to_rename = ['paste_metadata:{}*'.format(PASTES_FOLDER)]
|
||||
for key_to_rename in hset_keys_to_rename:
|
||||
|
||||
keys_to_rename = []
|
||||
for key in r_serv_metadata.scan_iter(key_to_rename):
|
||||
new_key = key.replace(PASTES_FOLDER, '', 1)
|
||||
# a hset with this key already exist
|
||||
keys_to_rename.append((key, new_key))
|
||||
index = index + 1
|
||||
for key, new_key in keys_to_rename:
|
||||
r_serv_metadata.rename(key, new_key)
|
||||
keys_to_rename = None
|
||||
|
||||
# to verify 120/100 try with scan
|
||||
hset_keys_to_rename = ['paste_metadata:*']
|
||||
for key_to_rename in hset_keys_to_rename:
|
||||
for key in r_serv_metadata.scan_iter(key_to_rename):
|
||||
father = r_serv_metadata.hget(key, 'father')
|
||||
super_father = r_serv_metadata.hget(key, 'super_father')
|
||||
|
||||
if father:
|
||||
if PASTES_FOLDER in father:
|
||||
index = index + 1
|
||||
r_serv_metadata.hdel(key, 'father')
|
||||
r_serv_metadata.hset(key, 'father', father.replace(PASTES_FOLDER, '', 1))
|
||||
|
||||
if super_father:
|
||||
if PASTES_FOLDER in super_father:
|
||||
index = index + 1
|
||||
r_serv_metadata.hdel(key, 'super_father')
|
||||
r_serv_metadata.hset(key, 'super_father', super_father.replace(PASTES_FOLDER, '', 1))
|
||||
|
||||
keys_to_rename = None
|
||||
|
||||
|
||||
end = time.time()
|
||||
|
||||
print('Updating ARDB_Metadata Done => {} paths: {} s'.format(index, end - start))
|
||||
|
||||
print()
|
||||
print('Updating ARDB_Tags ...')
|
||||
index = 0
|
||||
start = time.time()
|
||||
|
||||
tags_list = r_serv_tag.smembers('list_tags')
|
||||
for tag in tags_list:
|
||||
res = False
|
||||
|
||||
list_pastes = r_serv_tag.sscan(tag, 0, '*{}*'.format(PASTES_FOLDER), 1000)
|
||||
while list_pastes[1]:
|
||||
for paste in list_pastes[1]:
|
||||
r_serv_tag.srem(tag, paste)
|
||||
r_serv_tag.sadd(tag, paste.replace(PASTES_FOLDER, '', 1))
|
||||
index = index + 1
|
||||
|
||||
list_pastes = r_serv_tag.sscan(tag, 0, '*{}*'.format(PASTES_FOLDER), 1000)
|
||||
|
||||
end = time.time()
|
||||
print('Updating ARDB_Tags Done => {} paths: {} s'.format(index, end - start))
|
||||
|
||||
print()
|
||||
print('Updating ARDB_Onion ...')
|
||||
index = 0
|
||||
start = time.time()
|
||||
|
||||
hset_keys_to_rename = ['onion_metadata:*']
|
||||
for key_to_rename in hset_keys_to_rename:
|
||||
for key in r_serv_onion.scan_iter(key_to_rename):
|
||||
list_data = r_serv_onion.hscan(key, 0, '*{}*'.format(PASTES_FOLDER), 1000)
|
||||
while list_data[1]:
|
||||
for hash_key, value in list_data[1].items():
|
||||
r_serv_onion.hdel(key, hash_key)
|
||||
new_hash = hash_key.replace(PASTES_FOLDER, '', 1)
|
||||
new_value = value.replace(PASTES_FOLDER, '', 1)
|
||||
index = index +1
|
||||
r_serv_onion.hset(key, new_hash, new_value)
|
||||
|
||||
list_data = r_serv_onion.hscan(key, 0, '*{}*'.format(PASTES_FOLDER), 1000)
|
||||
|
||||
# update crawler queue
|
||||
for elem in r_serv_onion.smembers('onion_crawler_queue'):
|
||||
if PASTES_FOLDER in elem:
|
||||
r_serv_onion.srem('onion_crawler_queue', elem)
|
||||
r_serv_onion.sadd('onion_crawler_queue', elem.replace(PASTES_FOLDER, '', 1))
|
||||
index = index +1
|
||||
|
||||
for elem in r_serv_onion.smembers('onion_crawler_priority_queue'):
|
||||
if PASTES_FOLDER in elem:
|
||||
r_serv_onion.srem('onion_crawler_queue', elem)
|
||||
r_serv_onion.sadd('onion_crawler_queue', elem.replace(PASTES_FOLDER, '', 1))
|
||||
index = index +1
|
||||
|
||||
end = time.time()
|
||||
print('Updating ARDB_Onion Done => {} paths: {} s'.format(index, end - start))
|
||||
print()
|
||||
|
||||
#Set current ail version
|
||||
r_serv.set('ail:version', 'v1.5')
|
||||
|
||||
#Set current update_in_progress
|
||||
r_serv.set('ail:update_in_progress', 'v1.5')
|
||||
r_serv.set('ail:current_background_update', 'v1.5')
|
||||
|
||||
#Set current ail version
|
||||
r_serv.set('ail:update_date_v1.5', datetime.datetime.now().strftime("%Y%m%d"))
|
||||
|
||||
print('Done in {} s'.format(end - start_deb))
|
||||
|
|
|
@ -12,31 +12,48 @@ export PATH=$AIL_ARDB:$PATH
|
|||
export PATH=$AIL_BIN:$PATH
|
||||
export PATH=$AIL_FLASK:$PATH
|
||||
|
||||
echo "Killing all screens ..."
|
||||
bash -c "bash ${AIL_BIN}/LAUNCH.sh -k"
|
||||
echo ""
|
||||
echo "Starting ARDB ..."
|
||||
bash -c "bash ${AIL_BIN}/launch_ardb.sh"
|
||||
GREEN="\\033[1;32m"
|
||||
DEFAULT="\\033[0;39m"
|
||||
|
||||
flag_ardb=true
|
||||
while $flag_ardb; do
|
||||
sleep 1
|
||||
bash -c "bash ${AIL_BIN}/check_ardb.sh"
|
||||
if [ $? == 0 ]; then
|
||||
flag_ardb=false
|
||||
else
|
||||
echo "ARDB not available, waiting 5s before retry"
|
||||
sleep 5
|
||||
fi
|
||||
done
|
||||
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
|
||||
bash ${AIL_BIN}/LAUNCH.sh -k &
|
||||
wait
|
||||
|
||||
echo ""
|
||||
echo "Fixing ARDB ..."
|
||||
echo ""
|
||||
bash -c "python ${AIL_HOME}/update/v1.5/Update.py"
|
||||
bash -c "bash ${AIL_HOME}/update/bin/Update_Redis.sh"
|
||||
#bash -c "bash ${AIL_HOME}/update/bin/Update_ARDB.sh"
|
||||
|
||||
echo "Shutting down ARDB ..."
|
||||
bash -c "bash ${AIL_BIN}/LAUNCH.sh -k"
|
||||
echo ""
|
||||
echo -e $GREEN"Update DomainClassifier"$DEFAULT
|
||||
echo ""
|
||||
pip3 install --upgrade --force-reinstall git+https://github.com/D4-project/BGP-Ranking.git/@28013297efb039d2ebbce96ee2d89493f6ae56b0#subdirectory=client&egg=pybgpranking
|
||||
pip3 install --upgrade --force-reinstall git+https://github.com/adulau/DomainClassifier.git
|
||||
wait
|
||||
echo ""
|
||||
|
||||
echo ""
|
||||
echo -e $GREEN"Update Web thirdparty"$DEFAULT
|
||||
echo ""
|
||||
bash ${AIL_FLASK}update_thirdparty.sh &
|
||||
wait
|
||||
echo ""
|
||||
|
||||
bash ${AIL_BIN}LAUNCH.sh -lav &
|
||||
wait
|
||||
echo ""
|
||||
|
||||
echo ""
|
||||
echo -e $GREEN"Fixing ARDB ..."$DEFAULT
|
||||
echo ""
|
||||
python ${AIL_HOME}/update/v1.4/Update.py &
|
||||
wait
|
||||
echo ""
|
||||
echo ""
|
||||
|
||||
echo ""
|
||||
echo -e $GREEN"Shutting down ARDB ..."$DEFAULT
|
||||
bash ${AIL_BIN}/LAUNCH.sh -k &
|
||||
wait
|
||||
|
||||
echo ""
|
||||
|
||||
|
|
Loading…
Reference in New Issue