mirror of https://github.com/CIRCL/AIL-framework
chg: [DB] remove absolute path
parent
d44acea04d
commit
e83174327a
27
OVERVIEW.md
27
OVERVIEW.md
|
@ -119,9 +119,7 @@ Redis and ARDB overview
|
|||
```
|
||||
|
||||
ARDB overview
|
||||
---------------------------
|
||||
ARDB_DB
|
||||
* DB 1 - Curve
|
||||
|
||||
* DB 2 - TermFreq
|
||||
----------------------------------------- TERM ----------------------------------------
|
||||
|
||||
|
@ -141,7 +139,6 @@ ARDB_DB
|
|||
|
||||
SET - 'TrackedNotifications' term/set
|
||||
|
||||
* DB 3 - Trending
|
||||
* DB 4 - Sentiment
|
||||
----------------------------------------- SENTIMENT ------------------------------------
|
||||
|
||||
|
@ -153,18 +150,12 @@ ARDB_DB
|
|||
|
||||
SET - UniqID avg_score
|
||||
|
||||
* DB 5 - TermCred
|
||||
* DB 6 - Tags
|
||||
----------------------------------------------------------------------------------------
|
||||
DB 5 - TermCred
|
||||
|
||||
SET - tag paste*
|
||||
|
||||
----------------------------------------------------------------------------------------
|
||||
|
||||
* DB 7 - Metadata:
|
||||
----------------------------------------------------------------------------------------
|
||||
|
||||
SET - 'tag:' + paste tag
|
||||
|
||||
----------------------------------------------------------------------------------------
|
||||
----------------------------------------- BASE64 ----------------------------------------
|
||||
|
@ -186,26 +177,12 @@ ARDB_DB
|
|||
SET - 'hash_base64_all_type' hash_type *
|
||||
SET - 'hash_binary_all_type' hash_type *
|
||||
|
||||
SET - 'hash_paste:'+paste hash *
|
||||
SET - 'base64_paste:'+paste hash *
|
||||
SET - 'binary_paste:'+paste hash *
|
||||
|
||||
ZADD - 'hash_date:'+20180622 hash * nb_seen_this_day
|
||||
ZADD - 'base64_date:'+20180622 hash * nb_seen_this_day
|
||||
ZADD - 'binary_date:'+20180622 hash * nb_seen_this_day
|
||||
|
||||
ZADD - 'nb_seen_hash:'+hash paste * nb_seen_in_paste
|
||||
ZADD - 'base64_hash:'+hash paste * nb_seen_in_paste
|
||||
ZADD - 'binary_hash:'+hash paste * nb_seen_in_paste
|
||||
|
||||
ZADD - 'base64_type:'+type date nb_seen
|
||||
ZADD - 'binary_type:'+type date nb_seen
|
||||
|
||||
GET - 'base64_decoded:'+date nd_decoded
|
||||
GET - 'binary_decoded:'+date nd_decoded
|
||||
|
||||
* DB 8 - Statistics
|
||||
* DB 9 - Onion:
|
||||
----------------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
|
|
@ -10,8 +10,10 @@ import configparser
|
|||
|
||||
def update_hash_item(has_type):
|
||||
#get all hash items:
|
||||
all_base64 = r_serv_tag.smembers('infoleak:automatic-detection=\"{}\"'.format(has_type))
|
||||
#all_base64 = r_serv_tag.smembers('infoleak:automatic-detection=\"{}\"'.format(has_type))
|
||||
all_hash_items = r_serv_tag.smembers('infoleak:automatic-detection=\"{}\":20180925'.format(has_type))
|
||||
for item_path in all_hash_items:
|
||||
item_path = '/home/aurelien/git/python3/AIL-framework/PASTES/archive/pastebin.com_pro/2018/09/25/Fu9akJaz.gz'
|
||||
if PASTES_FOLDER in item_path:
|
||||
base64_key = '{}_paste:{}'.format(has_type, item_path)
|
||||
hash_key = 'hash_paste:{}'.format(item_path)
|
||||
|
|
|
@ -87,8 +87,8 @@ if __name__ == '__main__':
|
|||
for date_history in all_onion_history:
|
||||
pass
|
||||
#print('onion_history:{}:{}'.format(onion_domain, date_history))
|
||||
#r_serv_onion.delete('onion_history:{}:{}'.format(onion_domain, date_history))
|
||||
#r_serv_onion.delete('onion_history:{}'.format(onion_domain))
|
||||
r_serv_onion.delete('onion_history:{}:{}'.format(onion_domain, date_history))
|
||||
r_serv_onion.delete('onion_history:{}'.format(onion_domain))
|
||||
|
||||
# clean up domain
|
||||
all_domain_up = r_serv_onion.smembers('full_onion_up')
|
||||
|
@ -105,19 +105,19 @@ if __name__ == '__main__':
|
|||
item_father = item_father[0]
|
||||
#print(item_father)
|
||||
# delete old history
|
||||
#r_serv_onion.delete('onion_history:{}:{}'.format(onion_domain, date_history))
|
||||
r_serv_onion.delete('onion_history:{}:{}'.format(onion_domain, date_history))
|
||||
# create new history
|
||||
root_key = get_domain_root_from_paste_childrens(item_father, onion_domain)
|
||||
if root_key:
|
||||
#r_serv_onion.zadd('crawler_history_onion:{}:80'.format(onion_domain), get_date_epoch(date_history), root_key)
|
||||
r_serv_onion.zadd('crawler_history_onion:{}:80'.format(onion_domain), get_date_epoch(date_history), root_key)
|
||||
print('crawler_history_onion:{}:80 {} {}'.format(onion_domain, get_date_epoch(date_history), root_key))
|
||||
#update service metadata: paste_parent
|
||||
#r_serv_onion.hset('onion_metadata:{}'.format(onion_domain), 'paste_parent', root_key)
|
||||
r_serv_onion.hset('onion_metadata:{}'.format(onion_domain), 'paste_parent', root_key)
|
||||
|
||||
#r_serv_onion.delete('onion_history:{}'.format(onion_domain))
|
||||
r_serv_onion.delete('onion_history:{}'.format(onion_domain))
|
||||
|
||||
#r_serv_onion.hset('onion_metadata:{}'.format(onion_domain), 'ports', '80')
|
||||
#r_serv_onion.hdel('onion_metadata:{}'.format(onion_domain), 'last_seen')
|
||||
r_serv_onion.hset('onion_metadata:{}'.format(onion_domain), 'ports', '80')
|
||||
r_serv_onion.hdel('onion_metadata:{}'.format(onion_domain), 'last_seen')
|
||||
|
||||
|
||||
'''
|
||||
|
|
|
@ -7,6 +7,14 @@ import time
|
|||
import redis
|
||||
import configparser
|
||||
|
||||
def tags_key_fusion(old_item_path_key, new_item_path_key):
|
||||
print('fusion:')
|
||||
print(old_item_path_key)
|
||||
print(new_item_path_key)
|
||||
for tag in r_serv_metadata.smembers(old_item_path_key):
|
||||
r_serv_metadata.sadd(new_item_path_key, tag)
|
||||
r_serv_metadata.srem(old_item_path_key, tag)
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
start_deb = time.time()
|
||||
|
@ -115,6 +123,24 @@ if __name__ == '__main__':
|
|||
r_important_paste_2018.flushdb()
|
||||
r_important_paste_2019.flushdb()
|
||||
|
||||
#update item metadata tags
|
||||
tag_not_updated = True
|
||||
total_to_update = r_serv_tag.scard('maj:v1.5:absolute_path_to_rename')
|
||||
nb_updated = 0
|
||||
while tag_not_updated:
|
||||
item_path = r_serv_tag.spop('maj:v1.5:absolute_path_to_rename')
|
||||
old_tag_item_key = 'tag:{}'.format(item_path)
|
||||
new_item_path = item_path.replace(PASTES_FOLDER, '', 1)
|
||||
new_tag_item_key = 'tag:{}'.format(new_item_path)
|
||||
res = r_serv_metadata.renamenx(old_tag_item_key, new_tag_item_key)
|
||||
if res == 0:
|
||||
tags_key_fusion(old_tag_item_key, new_tag_item_key)
|
||||
nb_updated += 1
|
||||
if r_serv_tag.scard('maj:v1.5:absolute_path_to_rename') == 0:
|
||||
tag_not_updated = false
|
||||
else:
|
||||
print('{}/{} Tags updated'.format(nb_updated, total_to_update))
|
||||
|
||||
end = time.time()
|
||||
|
||||
|
||||
|
|
|
@ -153,8 +153,19 @@ def showpaste(content_range, requested_path):
|
|||
if r_serv_metadata.scard('hash_paste:'+requested_path) > 0:
|
||||
set_b64 = r_serv_metadata.smembers('hash_paste:'+requested_path)
|
||||
for hash in set_b64:
|
||||
print(requested_path)
|
||||
nb_in_file = int(r_serv_metadata.zscore('nb_seen_hash:'+hash, requested_path))
|
||||
nb_in_file = r_serv_metadata.zscore('nb_seen_hash:'+hash, requested_path)
|
||||
# item list not updated
|
||||
if nb_in_file is None:
|
||||
l_pastes = r_serv_metadata.zrange('nb_seen_hash:'+hash, 0, -1)
|
||||
for paste in l_pastes:
|
||||
# dynamic update
|
||||
if PASTES_FOLDER in paste:
|
||||
score = r_serv_metadata.zscore('nb_seen_hash:{}'.format(hash), paste)
|
||||
r_serv_metadata.zrem('nb_seen_hash:{}'.format(hash), paste)
|
||||
paste = paste.replace(PASTES_FOLDER, '', 1)
|
||||
r_serv_metadata.zadd('nb_seen_hash:{}'.format(hash), score, paste)
|
||||
nb_in_file = r_serv_metadata.zscore('nb_seen_hash:'+hash, requested_path)
|
||||
nb_in_file = int(nb_in_file)
|
||||
estimated_type = r_serv_metadata.hget('metadata_hash:'+hash, 'estimated_type')
|
||||
file_type = estimated_type.split('/')[0]
|
||||
# set file icon
|
||||
|
|
Loading…
Reference in New Issue