From e83174327a63a62e642e2e89e4181bd98876dd0f Mon Sep 17 00:00:00 2001 From: Terrtia Date: Wed, 10 Apr 2019 17:47:40 +0200 Subject: [PATCH] chg: [DB] remove absolute path --- OVERVIEW.md | 33 +++----------------- update/v1.4/Update-ARDB_Metadata.py | 4 ++- update/v1.4/Update-ARDB_Onions.py | 16 +++++----- update/v1.4/Update-ARDB_Tags.py | 26 +++++++++++++++ var/www/modules/showpaste/Flask_showpaste.py | 15 +++++++-- 5 files changed, 55 insertions(+), 39 deletions(-) diff --git a/OVERVIEW.md b/OVERVIEW.md index dc2ce694..37cf3adc 100644 --- a/OVERVIEW.md +++ b/OVERVIEW.md @@ -119,9 +119,7 @@ Redis and ARDB overview ``` ARDB overview ---------------------------- -ARDB_DB -* DB 1 - Curve + * DB 2 - TermFreq ----------------------------------------- TERM ---------------------------------------- @@ -141,30 +139,23 @@ ARDB_DB SET - 'TrackedNotifications' term/set -* DB 3 - Trending * DB 4 - Sentiment ----------------------------------------- SENTIMENT ------------------------------------ SET - 'Provider_set' Provider - + KEY - 'UniqID' INT SET - provider_timestamp UniqID SET - UniqID avg_score -* DB 5 - TermCred -* DB 6 - Tags - ---------------------------------------------------------------------------------------- - - SET - tag paste* + DB 5 - TermCred + - ---------------------------------------------------------------------------------------- * DB 7 - Metadata: - ---------------------------------------------------------------------------------------- - - SET - 'tag:' + paste tag + ---------------------------------------------------------------------------------------- ----------------------------------------- BASE64 ---------------------------------------- @@ -186,26 +177,12 @@ ARDB_DB SET - 'hash_base64_all_type' hash_type * SET - 'hash_binary_all_type' hash_type * - SET - 'hash_paste:'+paste hash * - SET - 'base64_paste:'+paste hash * - SET - 'binary_paste:'+paste hash * - ZADD - 'hash_date:'+20180622 hash * nb_seen_this_day ZADD - 'base64_date:'+20180622 hash * nb_seen_this_day ZADD - 'binary_date:'+20180622 hash * nb_seen_this_day - ZADD - 'nb_seen_hash:'+hash paste * nb_seen_in_paste - ZADD - 'base64_hash:'+hash paste * nb_seen_in_paste - ZADD - 'binary_hash:'+hash paste * nb_seen_in_paste - ZADD - 'base64_type:'+type date nb_seen ZADD - 'binary_type:'+type date nb_seen GET - 'base64_decoded:'+date nd_decoded GET - 'binary_decoded:'+date nd_decoded - -* DB 8 - Statistics -* DB 9 - Onion: - ---------------------------------------------------------------------------------------- - - diff --git a/update/v1.4/Update-ARDB_Metadata.py b/update/v1.4/Update-ARDB_Metadata.py index 7f7951ae..9b131ee2 100755 --- a/update/v1.4/Update-ARDB_Metadata.py +++ b/update/v1.4/Update-ARDB_Metadata.py @@ -10,8 +10,10 @@ import configparser def update_hash_item(has_type): #get all hash items: - all_base64 = r_serv_tag.smembers('infoleak:automatic-detection=\"{}\"'.format(has_type)) + #all_base64 = r_serv_tag.smembers('infoleak:automatic-detection=\"{}\"'.format(has_type)) + all_hash_items = r_serv_tag.smembers('infoleak:automatic-detection=\"{}\":20180925'.format(has_type)) for item_path in all_hash_items: + item_path = '/home/aurelien/git/python3/AIL-framework/PASTES/archive/pastebin.com_pro/2018/09/25/Fu9akJaz.gz' if PASTES_FOLDER in item_path: base64_key = '{}_paste:{}'.format(has_type, item_path) hash_key = 'hash_paste:{}'.format(item_path) diff --git a/update/v1.4/Update-ARDB_Onions.py b/update/v1.4/Update-ARDB_Onions.py index f88cc3a4..10709069 100755 --- a/update/v1.4/Update-ARDB_Onions.py +++ b/update/v1.4/Update-ARDB_Onions.py @@ -87,8 +87,8 @@ if __name__ == '__main__': for date_history in all_onion_history: pass #print('onion_history:{}:{}'.format(onion_domain, date_history)) - #r_serv_onion.delete('onion_history:{}:{}'.format(onion_domain, date_history)) - #r_serv_onion.delete('onion_history:{}'.format(onion_domain)) + r_serv_onion.delete('onion_history:{}:{}'.format(onion_domain, date_history)) + r_serv_onion.delete('onion_history:{}'.format(onion_domain)) # clean up domain all_domain_up = r_serv_onion.smembers('full_onion_up') @@ -105,19 +105,19 @@ if __name__ == '__main__': item_father = item_father[0] #print(item_father) # delete old history - #r_serv_onion.delete('onion_history:{}:{}'.format(onion_domain, date_history)) + r_serv_onion.delete('onion_history:{}:{}'.format(onion_domain, date_history)) # create new history root_key = get_domain_root_from_paste_childrens(item_father, onion_domain) if root_key: - #r_serv_onion.zadd('crawler_history_onion:{}:80'.format(onion_domain), get_date_epoch(date_history), root_key) + r_serv_onion.zadd('crawler_history_onion:{}:80'.format(onion_domain), get_date_epoch(date_history), root_key) print('crawler_history_onion:{}:80 {} {}'.format(onion_domain, get_date_epoch(date_history), root_key)) #update service metadata: paste_parent - #r_serv_onion.hset('onion_metadata:{}'.format(onion_domain), 'paste_parent', root_key) + r_serv_onion.hset('onion_metadata:{}'.format(onion_domain), 'paste_parent', root_key) - #r_serv_onion.delete('onion_history:{}'.format(onion_domain)) + r_serv_onion.delete('onion_history:{}'.format(onion_domain)) - #r_serv_onion.hset('onion_metadata:{}'.format(onion_domain), 'ports', '80') - #r_serv_onion.hdel('onion_metadata:{}'.format(onion_domain), 'last_seen') + r_serv_onion.hset('onion_metadata:{}'.format(onion_domain), 'ports', '80') + r_serv_onion.hdel('onion_metadata:{}'.format(onion_domain), 'last_seen') ''' diff --git a/update/v1.4/Update-ARDB_Tags.py b/update/v1.4/Update-ARDB_Tags.py index 4327b9d0..7044f7c4 100755 --- a/update/v1.4/Update-ARDB_Tags.py +++ b/update/v1.4/Update-ARDB_Tags.py @@ -7,6 +7,14 @@ import time import redis import configparser +def tags_key_fusion(old_item_path_key, new_item_path_key): + print('fusion:') + print(old_item_path_key) + print(new_item_path_key) + for tag in r_serv_metadata.smembers(old_item_path_key): + r_serv_metadata.sadd(new_item_path_key, tag) + r_serv_metadata.srem(old_item_path_key, tag) + if __name__ == '__main__': start_deb = time.time() @@ -115,6 +123,24 @@ if __name__ == '__main__': r_important_paste_2018.flushdb() r_important_paste_2019.flushdb() + #update item metadata tags + tag_not_updated = True + total_to_update = r_serv_tag.scard('maj:v1.5:absolute_path_to_rename') + nb_updated = 0 + while tag_not_updated: + item_path = r_serv_tag.spop('maj:v1.5:absolute_path_to_rename') + old_tag_item_key = 'tag:{}'.format(item_path) + new_item_path = item_path.replace(PASTES_FOLDER, '', 1) + new_tag_item_key = 'tag:{}'.format(new_item_path) + res = r_serv_metadata.renamenx(old_tag_item_key, new_tag_item_key) + if res == 0: + tags_key_fusion(old_tag_item_key, new_tag_item_key) + nb_updated += 1 + if r_serv_tag.scard('maj:v1.5:absolute_path_to_rename') == 0: + tag_not_updated = false + else: + print('{}/{} Tags updated'.format(nb_updated, total_to_update)) + end = time.time() diff --git a/var/www/modules/showpaste/Flask_showpaste.py b/var/www/modules/showpaste/Flask_showpaste.py index 2269dffc..2d856a96 100644 --- a/var/www/modules/showpaste/Flask_showpaste.py +++ b/var/www/modules/showpaste/Flask_showpaste.py @@ -153,8 +153,19 @@ def showpaste(content_range, requested_path): if r_serv_metadata.scard('hash_paste:'+requested_path) > 0: set_b64 = r_serv_metadata.smembers('hash_paste:'+requested_path) for hash in set_b64: - print(requested_path) - nb_in_file = int(r_serv_metadata.zscore('nb_seen_hash:'+hash, requested_path)) + nb_in_file = r_serv_metadata.zscore('nb_seen_hash:'+hash, requested_path) + # item list not updated + if nb_in_file is None: + l_pastes = r_serv_metadata.zrange('nb_seen_hash:'+hash, 0, -1) + for paste in l_pastes: + # dynamic update + if PASTES_FOLDER in paste: + score = r_serv_metadata.zscore('nb_seen_hash:{}'.format(hash), paste) + r_serv_metadata.zrem('nb_seen_hash:{}'.format(hash), paste) + paste = paste.replace(PASTES_FOLDER, '', 1) + r_serv_metadata.zadd('nb_seen_hash:{}'.format(hash), score, paste) + nb_in_file = r_serv_metadata.zscore('nb_seen_hash:'+hash, requested_path) + nb_in_file = int(nb_in_file) estimated_type = r_serv_metadata.hget('metadata_hash:'+hash, 'estimated_type') file_type = estimated_type.split('/')[0] # set file icon