AIL-framework/update/v2.6/Update_screenshots.py

102 lines
3.4 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
import time
import redis
import datetime
from hashlib import sha256
from pyfaup.faup import Faup
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/'))
import Item
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
import ConfigLoader
def get_all_item(screenshot_sha256):
return r_serv_onion.smembers('screenshot:{}'.format(screenshot_sha256))
def sanitize_domain(domain):
faup.decode(domain)
domain_sanitized = faup.get()
2019-12-17 16:26:21 +01:00
domain_sanitized = domain_sanitized['domain']
try:
domain_sanitized = domain_sanitized.decode()
except:
pass
return domain_sanitized.lower()
def update_db(screenshot_sha256):
screenshot_items = get_all_item(screenshot_sha256)
if screenshot_items:
for item_id in screenshot_items:
item_id = item_id.replace(PASTES_FOLDER+'/', '', 1) # remove root path
domain = Item.get_domain(item_id)
domain_sanitized = sanitize_domain(domain)
if domain != domain_sanitized:
r_serv_onion.sadd('incorrect_domain', domain)
domain = domain_sanitized
#print(item_id)
#print(domain)
r_serv_onion.sadd('domain_screenshot:{}'.format(domain), screenshot_sha256)
r_serv_onion.sadd('screenshot_domain:{}'.format(screenshot_sha256), domain)
else:
pass
# broken screenshot
r_serv_onion.sadd('broken_screenshot', screenshot_sha256)
if __name__ == '__main__':
start_deb = time.time()
faup = Faup()
config_loader = ConfigLoader.ConfigLoader()
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes"))
SCREENSHOT_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "crawled_screenshot"), 'screenshot')
r_serv_db = config_loader.get_redis_conn("ARDB_DB")
r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
config_loader = None
r_serv_db.set('ail:update_in_progress', 'v2.6')
r_serv_db.set('ail:current_background_update', 'v2.6')
r_serv_db.set('ail:current_background_script_stat', 20)
r_serv_db.set('ail:current_background_script', 'screenshot update')
nb = 0
if os.path.isdir(SCREENSHOT_FOLDER):
for root, dirs, files in os.walk(SCREENSHOT_FOLDER, topdown=False):
#print(dirs)
for name in files:
nb = nb + 1
screenshot_sha256 = os.path.join(root, name)
screenshot_sha256 = screenshot_sha256[:-4] # remove .png
screenshot_sha256 = screenshot_sha256.replace(SCREENSHOT_FOLDER, '', 1)
screenshot_sha256 = screenshot_sha256.replace('/', '')
update_db(screenshot_sha256)
#print('Screenshot updated: {}'.format(nb))
2019-12-17 15:59:00 +01:00
if nb % 1000 == 0:
r_serv_db.set('ail:current_background_script', 'screenshot updated: {}'.format(nb))
r_serv_db.set('ail:current_background_script_stat', 100)
end = time.time()
print('ALL screenshot updated: {} in {} s'.format(nb, end - start_deb))
r_serv_db.delete('ail:update_in_progress')
r_serv_db.delete('ail:current_background_script')
r_serv_db.delete('ail:current_background_script_stat')
r_serv_db.delete('ail:current_background_update')
r_serv_db.srem('ail:to_update', 'v2.6')