From 3cc614a1adb5e0943524f4a4b6fe46ac7823198e Mon Sep 17 00:00:00 2001 From: Terrtia Date: Thu, 11 Apr 2019 11:58:06 +0200 Subject: [PATCH] chg: [DB] update items tags metadata --- bin/packages/HiddenServices.py | 24 +++++-- update/v1.4/Update-ARDB_Metadata.py | 14 ++-- update/v1.4/Update-ARDB_Tags.py | 24 ------- update/v1.4/Update-ARDB_Tags_background.py | 70 +++++++++++++++++++ .../hiddenServices/Flask_hiddenServices.py | 7 +- .../hiddenServices/templates/domains.html | 2 +- 6 files changed, 99 insertions(+), 42 deletions(-) create mode 100755 update/v1.4/Update-ARDB_Tags_background.py diff --git a/bin/packages/HiddenServices.py b/bin/packages/HiddenServices.py index 2361f60f..39fd4427 100755 --- a/bin/packages/HiddenServices.py +++ b/bin/packages/HiddenServices.py @@ -59,6 +59,8 @@ class HiddenServices(object): db=cfg.getint("ARDB_Metadata", "db"), decode_responses=True) + self.PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], cfg.get("Directories", "pastes")) + '/' + self.domain = domain self.type = type self.port = port @@ -76,9 +78,16 @@ class HiddenServices(object): ## TODO: # FIXME: add error pass - def remove_absolute_path_link(self, key, value): - print(key) - print(value) + #def remove_absolute_path_link(self, key, value): + # print(key) + # print(value) + + def update_item_path_children(self, key, children): + if self.PASTES_FOLDER in children: + self.r_serv_metadata.srem(key, children) + children = children.replace(self.PASTES_FOLDER, '', 1) + self.r_serv_metadata.sadd(key, children) + return children def get_origin_paste_name(self): origin_item = self.r_serv_onion.hget('onion_metadata:{}'.format(self.domain), 'paste_parent') @@ -106,7 +115,6 @@ class HiddenServices(object): # need to remove it else: p_tags = self.r_serv_metadata.smembers('tag:{}'.format(os.path.join(self.paste_directory, item))) - print(p_tags) for tag in p_tags: self.tags[tag] = self.tags.get(tag, 0) + 1 @@ -158,8 +166,10 @@ class HiddenServices(object): if father is None: return [] l_crawled_pastes = [] - paste_childrens = self.r_serv_metadata.smembers('paste_children:{}'.format(father)) + key = 'paste_children:{}'.format(father) + paste_childrens = self.r_serv_metadata.smembers(key) for children in paste_childrens: + children = self.update_item_path_children(key, children) if self.domain in children: l_crawled_pastes.append(children) self.update_domain_tags(children) @@ -174,8 +184,8 @@ class HiddenServices(object): else: key = os.path.join(self.paste_directory, item) link = self.r_serv_metadata.hget('paste_metadata:{}'.format(key), 'real_link') - if link: - self.remove_absolute_path_link(key, link) + #if link: + #self.remove_absolute_path_link(key, link) return link diff --git a/update/v1.4/Update-ARDB_Metadata.py b/update/v1.4/Update-ARDB_Metadata.py index 9b131ee2..5573cede 100755 --- a/update/v1.4/Update-ARDB_Metadata.py +++ b/update/v1.4/Update-ARDB_Metadata.py @@ -85,19 +85,19 @@ if __name__ == '__main__': item_path = item_path.replace(PASTES_FOLDER, '', 1) new_item_metadata = 'paste_metadata:{}'.format(item_path) ## TODO: catch error - r_serv_metadata.rename(old_item_metadata, new_item_metadata) + res = r_serv_metadata.renamenx(old_item_metadata, new_item_metadata) # update domain port - domain = r_serv_metadata.hget('paste_metadata:{}'.format(item_path), 'domain') + domain = r_serv_metadata.hget(new_item_metadata, 'domain') if domain: - r_serv_metadata.hset('paste_metadata:{}'.format(item_path), 'domain', '{}:80'.format(domain)) - super_father = r_serv_metadata.hget('paste_metadata:{}'.format(item_path), 'super_father') + r_serv_metadata.hset(new_item_metadata, 'domain', '{}:80'.format(domain)) + super_father = r_serv_metadata.hget(new_item_metadata, 'super_father') if super_father: if PASTES_FOLDER in super_father: - r_serv_metadata.hset('paste_metadata:{}'.format(item_path), 'super_father', super_father.replace(PASTES_FOLDER, '', 1)) - father = r_serv_metadata.hget('paste_metadata:{}'.format(item_path), 'father') + r_serv_metadata.hset(new_item_metadata, 'super_father', super_father.replace(PASTES_FOLDER, '', 1)) + father = r_serv_metadata.hget(new_item_metadata, 'father') if father: if PASTES_FOLDER in father: - r_serv_metadata.hset('paste_metadata:{}'.format(item_path), 'father', father.replace(PASTES_FOLDER, '', 1)) + r_serv_metadata.hset(new_item_metadata, 'father', father.replace(PASTES_FOLDER, '', 1)) diff --git a/update/v1.4/Update-ARDB_Tags.py b/update/v1.4/Update-ARDB_Tags.py index 7044f7c4..fa26e6e0 100755 --- a/update/v1.4/Update-ARDB_Tags.py +++ b/update/v1.4/Update-ARDB_Tags.py @@ -47,12 +47,6 @@ if __name__ == '__main__': db=cfg.getint("ARDB_Onion", "db"), decode_responses=True) - r_serv_onion = redis.StrictRedis( - host=cfg.get("ARDB_Onion", "host"), - port=cfg.getint("ARDB_Onion", "port"), - db=cfg.getint("ARDB_Onion", "db"), - decode_responses=True) - r_important_paste_2018 = redis.StrictRedis( host=cfg.get("ARDB_Metadata", "host"), port=cfg.getint("ARDB_Metadata", "port"), @@ -123,24 +117,6 @@ if __name__ == '__main__': r_important_paste_2018.flushdb() r_important_paste_2019.flushdb() - #update item metadata tags - tag_not_updated = True - total_to_update = r_serv_tag.scard('maj:v1.5:absolute_path_to_rename') - nb_updated = 0 - while tag_not_updated: - item_path = r_serv_tag.spop('maj:v1.5:absolute_path_to_rename') - old_tag_item_key = 'tag:{}'.format(item_path) - new_item_path = item_path.replace(PASTES_FOLDER, '', 1) - new_tag_item_key = 'tag:{}'.format(new_item_path) - res = r_serv_metadata.renamenx(old_tag_item_key, new_tag_item_key) - if res == 0: - tags_key_fusion(old_tag_item_key, new_tag_item_key) - nb_updated += 1 - if r_serv_tag.scard('maj:v1.5:absolute_path_to_rename') == 0: - tag_not_updated = false - else: - print('{}/{} Tags updated'.format(nb_updated, total_to_update)) - end = time.time() diff --git a/update/v1.4/Update-ARDB_Tags_background.py b/update/v1.4/Update-ARDB_Tags_background.py new file mode 100755 index 00000000..8333e96c --- /dev/null +++ b/update/v1.4/Update-ARDB_Tags_background.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +import os +import sys +import time +import redis +import configparser + +def tags_key_fusion(old_item_path_key, new_item_path_key): + print('fusion:') + print(old_item_path_key) + print(new_item_path_key) + for tag in r_serv_metadata.smembers(old_item_path_key): + r_serv_metadata.sadd(new_item_path_key, tag) + r_serv_metadata.srem(old_item_path_key, tag) + +if __name__ == '__main__': + + start_deb = time.time() + + configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') + if not os.path.exists(configfile): + raise Exception('Unable to find the configuration file. \ + Did you set environment variables? \ + Or activate the virtualenv.') + cfg = configparser.ConfigParser() + cfg.read(configfile) + + PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], cfg.get("Directories", "pastes")) + '/' + + r_serv_metadata = redis.StrictRedis( + host=cfg.get("ARDB_Metadata", "host"), + port=cfg.getint("ARDB_Metadata", "port"), + db=cfg.getint("ARDB_Metadata", "db"), + decode_responses=True) + + r_serv_tag = redis.StrictRedis( + host=cfg.get("ARDB_Tags", "host"), + port=cfg.getint("ARDB_Tags", "port"), + db=cfg.getint("ARDB_Tags", "db"), + decode_responses=True) + + print('Updating ARDB_Tags ...') + start = time.time() + + #update item metadata tags + tag_not_updated = True + total_to_update = r_serv_tag.scard('maj:v1.5:absolute_path_to_rename') + nb_updated = 0 + if total_to_update > 0: + while tag_not_updated: + item_path = r_serv_tag.spop('maj:v1.5:absolute_path_to_rename') + old_tag_item_key = 'tag:{}'.format(item_path) + new_item_path = item_path.replace(PASTES_FOLDER, '', 1) + new_tag_item_key = 'tag:{}'.format(new_item_path) + res = r_serv_metadata.renamenx(old_tag_item_key, new_tag_item_key) + if res == 0: + tags_key_fusion(old_tag_item_key, new_tag_item_key) + nb_updated += 1 + if r_serv_tag.scard('maj:v1.5:absolute_path_to_rename') == 0: + tag_not_updated = False + else: + progress = int((nb_updated * 100) /total_to_update) + print('{}/{} Tags updated {}%'.format(nb_updated, total_to_update, progress)) + + end = time.time() + + + print('Updating ARDB_Tags Done: {} s'.format(end - start)) diff --git a/var/www/modules/hiddenServices/Flask_hiddenServices.py b/var/www/modules/hiddenServices/Flask_hiddenServices.py index bb8ee2b3..b687b7df 100644 --- a/var/www/modules/hiddenServices/Flask_hiddenServices.py +++ b/var/www/modules/hiddenServices/Flask_hiddenServices.py @@ -175,7 +175,6 @@ def get_crawler_splash_status(type): return crawler_metadata def create_crawler_config(mode, service_type, crawler_config, domain): - print(crawler_config) if mode == 'manual': r_cache.set('crawler_config:{}:{}:{}'.format(mode, service_type, domain), json.dumps(crawler_config)) elif mode == 'auto': @@ -559,8 +558,10 @@ def show_domain(): h = HiddenServices(domain, type, port=port) item_core = h.get_domain_crawled_core_item(epoch=epoch) - epoch = item_core['epoch'] - l_pastes = h.get_last_crawled_pastes(item_root=item_core['root_item']) + if item_core: + l_pastes = h.get_last_crawled_pastes(item_root=item_core['root_item']) + else: + l_pastes = [] dict_links = h.get_all_links(l_pastes) if l_pastes: status = True diff --git a/var/www/modules/hiddenServices/templates/domains.html b/var/www/modules/hiddenServices/templates/domains.html index 136291b1..85f2006a 100644 --- a/var/www/modules/hiddenServices/templates/domains.html +++ b/var/www/modules/hiddenServices/templates/domains.html @@ -63,7 +63,7 @@ {% for domain in domains_by_day[date] %} - {{ domain }} + {{ domain }}
{% for tag in domain_metadata[domain]['tags'] %}