mirror of https://github.com/CIRCL/AIL-framework
chg: [DB] remove absolute path
parent
d44acea04d
commit
e83174327a
33
OVERVIEW.md
33
OVERVIEW.md
|
@ -119,9 +119,7 @@ Redis and ARDB overview
|
||||||
```
|
```
|
||||||
|
|
||||||
ARDB overview
|
ARDB overview
|
||||||
---------------------------
|
|
||||||
ARDB_DB
|
|
||||||
* DB 1 - Curve
|
|
||||||
* DB 2 - TermFreq
|
* DB 2 - TermFreq
|
||||||
----------------------------------------- TERM ----------------------------------------
|
----------------------------------------- TERM ----------------------------------------
|
||||||
|
|
||||||
|
@ -141,30 +139,23 @@ ARDB_DB
|
||||||
|
|
||||||
SET - 'TrackedNotifications' term/set
|
SET - 'TrackedNotifications' term/set
|
||||||
|
|
||||||
* DB 3 - Trending
|
|
||||||
* DB 4 - Sentiment
|
* DB 4 - Sentiment
|
||||||
----------------------------------------- SENTIMENT ------------------------------------
|
----------------------------------------- SENTIMENT ------------------------------------
|
||||||
|
|
||||||
SET - 'Provider_set' Provider
|
SET - 'Provider_set' Provider
|
||||||
|
|
||||||
KEY - 'UniqID' INT
|
KEY - 'UniqID' INT
|
||||||
|
|
||||||
SET - provider_timestamp UniqID
|
SET - provider_timestamp UniqID
|
||||||
|
|
||||||
SET - UniqID avg_score
|
SET - UniqID avg_score
|
||||||
|
|
||||||
* DB 5 - TermCred
|
DB 5 - TermCred
|
||||||
* DB 6 - Tags
|
|
||||||
----------------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
SET - tag paste*
|
|
||||||
|
|
||||||
----------------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
* DB 7 - Metadata:
|
* DB 7 - Metadata:
|
||||||
----------------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
SET - 'tag:' + paste tag
|
|
||||||
|
|
||||||
----------------------------------------------------------------------------------------
|
----------------------------------------------------------------------------------------
|
||||||
----------------------------------------- BASE64 ----------------------------------------
|
----------------------------------------- BASE64 ----------------------------------------
|
||||||
|
@ -186,26 +177,12 @@ ARDB_DB
|
||||||
SET - 'hash_base64_all_type' hash_type *
|
SET - 'hash_base64_all_type' hash_type *
|
||||||
SET - 'hash_binary_all_type' hash_type *
|
SET - 'hash_binary_all_type' hash_type *
|
||||||
|
|
||||||
SET - 'hash_paste:'+paste hash *
|
|
||||||
SET - 'base64_paste:'+paste hash *
|
|
||||||
SET - 'binary_paste:'+paste hash *
|
|
||||||
|
|
||||||
ZADD - 'hash_date:'+20180622 hash * nb_seen_this_day
|
ZADD - 'hash_date:'+20180622 hash * nb_seen_this_day
|
||||||
ZADD - 'base64_date:'+20180622 hash * nb_seen_this_day
|
ZADD - 'base64_date:'+20180622 hash * nb_seen_this_day
|
||||||
ZADD - 'binary_date:'+20180622 hash * nb_seen_this_day
|
ZADD - 'binary_date:'+20180622 hash * nb_seen_this_day
|
||||||
|
|
||||||
ZADD - 'nb_seen_hash:'+hash paste * nb_seen_in_paste
|
|
||||||
ZADD - 'base64_hash:'+hash paste * nb_seen_in_paste
|
|
||||||
ZADD - 'binary_hash:'+hash paste * nb_seen_in_paste
|
|
||||||
|
|
||||||
ZADD - 'base64_type:'+type date nb_seen
|
ZADD - 'base64_type:'+type date nb_seen
|
||||||
ZADD - 'binary_type:'+type date nb_seen
|
ZADD - 'binary_type:'+type date nb_seen
|
||||||
|
|
||||||
GET - 'base64_decoded:'+date nd_decoded
|
GET - 'base64_decoded:'+date nd_decoded
|
||||||
GET - 'binary_decoded:'+date nd_decoded
|
GET - 'binary_decoded:'+date nd_decoded
|
||||||
|
|
||||||
* DB 8 - Statistics
|
|
||||||
* DB 9 - Onion:
|
|
||||||
----------------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -10,8 +10,10 @@ import configparser
|
||||||
|
|
||||||
def update_hash_item(has_type):
|
def update_hash_item(has_type):
|
||||||
#get all hash items:
|
#get all hash items:
|
||||||
all_base64 = r_serv_tag.smembers('infoleak:automatic-detection=\"{}\"'.format(has_type))
|
#all_base64 = r_serv_tag.smembers('infoleak:automatic-detection=\"{}\"'.format(has_type))
|
||||||
|
all_hash_items = r_serv_tag.smembers('infoleak:automatic-detection=\"{}\":20180925'.format(has_type))
|
||||||
for item_path in all_hash_items:
|
for item_path in all_hash_items:
|
||||||
|
item_path = '/home/aurelien/git/python3/AIL-framework/PASTES/archive/pastebin.com_pro/2018/09/25/Fu9akJaz.gz'
|
||||||
if PASTES_FOLDER in item_path:
|
if PASTES_FOLDER in item_path:
|
||||||
base64_key = '{}_paste:{}'.format(has_type, item_path)
|
base64_key = '{}_paste:{}'.format(has_type, item_path)
|
||||||
hash_key = 'hash_paste:{}'.format(item_path)
|
hash_key = 'hash_paste:{}'.format(item_path)
|
||||||
|
|
|
@ -87,8 +87,8 @@ if __name__ == '__main__':
|
||||||
for date_history in all_onion_history:
|
for date_history in all_onion_history:
|
||||||
pass
|
pass
|
||||||
#print('onion_history:{}:{}'.format(onion_domain, date_history))
|
#print('onion_history:{}:{}'.format(onion_domain, date_history))
|
||||||
#r_serv_onion.delete('onion_history:{}:{}'.format(onion_domain, date_history))
|
r_serv_onion.delete('onion_history:{}:{}'.format(onion_domain, date_history))
|
||||||
#r_serv_onion.delete('onion_history:{}'.format(onion_domain))
|
r_serv_onion.delete('onion_history:{}'.format(onion_domain))
|
||||||
|
|
||||||
# clean up domain
|
# clean up domain
|
||||||
all_domain_up = r_serv_onion.smembers('full_onion_up')
|
all_domain_up = r_serv_onion.smembers('full_onion_up')
|
||||||
|
@ -105,19 +105,19 @@ if __name__ == '__main__':
|
||||||
item_father = item_father[0]
|
item_father = item_father[0]
|
||||||
#print(item_father)
|
#print(item_father)
|
||||||
# delete old history
|
# delete old history
|
||||||
#r_serv_onion.delete('onion_history:{}:{}'.format(onion_domain, date_history))
|
r_serv_onion.delete('onion_history:{}:{}'.format(onion_domain, date_history))
|
||||||
# create new history
|
# create new history
|
||||||
root_key = get_domain_root_from_paste_childrens(item_father, onion_domain)
|
root_key = get_domain_root_from_paste_childrens(item_father, onion_domain)
|
||||||
if root_key:
|
if root_key:
|
||||||
#r_serv_onion.zadd('crawler_history_onion:{}:80'.format(onion_domain), get_date_epoch(date_history), root_key)
|
r_serv_onion.zadd('crawler_history_onion:{}:80'.format(onion_domain), get_date_epoch(date_history), root_key)
|
||||||
print('crawler_history_onion:{}:80 {} {}'.format(onion_domain, get_date_epoch(date_history), root_key))
|
print('crawler_history_onion:{}:80 {} {}'.format(onion_domain, get_date_epoch(date_history), root_key))
|
||||||
#update service metadata: paste_parent
|
#update service metadata: paste_parent
|
||||||
#r_serv_onion.hset('onion_metadata:{}'.format(onion_domain), 'paste_parent', root_key)
|
r_serv_onion.hset('onion_metadata:{}'.format(onion_domain), 'paste_parent', root_key)
|
||||||
|
|
||||||
#r_serv_onion.delete('onion_history:{}'.format(onion_domain))
|
r_serv_onion.delete('onion_history:{}'.format(onion_domain))
|
||||||
|
|
||||||
#r_serv_onion.hset('onion_metadata:{}'.format(onion_domain), 'ports', '80')
|
r_serv_onion.hset('onion_metadata:{}'.format(onion_domain), 'ports', '80')
|
||||||
#r_serv_onion.hdel('onion_metadata:{}'.format(onion_domain), 'last_seen')
|
r_serv_onion.hdel('onion_metadata:{}'.format(onion_domain), 'last_seen')
|
||||||
|
|
||||||
|
|
||||||
'''
|
'''
|
||||||
|
|
|
@ -7,6 +7,14 @@ import time
|
||||||
import redis
|
import redis
|
||||||
import configparser
|
import configparser
|
||||||
|
|
||||||
|
def tags_key_fusion(old_item_path_key, new_item_path_key):
|
||||||
|
print('fusion:')
|
||||||
|
print(old_item_path_key)
|
||||||
|
print(new_item_path_key)
|
||||||
|
for tag in r_serv_metadata.smembers(old_item_path_key):
|
||||||
|
r_serv_metadata.sadd(new_item_path_key, tag)
|
||||||
|
r_serv_metadata.srem(old_item_path_key, tag)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
||||||
start_deb = time.time()
|
start_deb = time.time()
|
||||||
|
@ -115,6 +123,24 @@ if __name__ == '__main__':
|
||||||
r_important_paste_2018.flushdb()
|
r_important_paste_2018.flushdb()
|
||||||
r_important_paste_2019.flushdb()
|
r_important_paste_2019.flushdb()
|
||||||
|
|
||||||
|
#update item metadata tags
|
||||||
|
tag_not_updated = True
|
||||||
|
total_to_update = r_serv_tag.scard('maj:v1.5:absolute_path_to_rename')
|
||||||
|
nb_updated = 0
|
||||||
|
while tag_not_updated:
|
||||||
|
item_path = r_serv_tag.spop('maj:v1.5:absolute_path_to_rename')
|
||||||
|
old_tag_item_key = 'tag:{}'.format(item_path)
|
||||||
|
new_item_path = item_path.replace(PASTES_FOLDER, '', 1)
|
||||||
|
new_tag_item_key = 'tag:{}'.format(new_item_path)
|
||||||
|
res = r_serv_metadata.renamenx(old_tag_item_key, new_tag_item_key)
|
||||||
|
if res == 0:
|
||||||
|
tags_key_fusion(old_tag_item_key, new_tag_item_key)
|
||||||
|
nb_updated += 1
|
||||||
|
if r_serv_tag.scard('maj:v1.5:absolute_path_to_rename') == 0:
|
||||||
|
tag_not_updated = false
|
||||||
|
else:
|
||||||
|
print('{}/{} Tags updated'.format(nb_updated, total_to_update))
|
||||||
|
|
||||||
end = time.time()
|
end = time.time()
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -153,8 +153,19 @@ def showpaste(content_range, requested_path):
|
||||||
if r_serv_metadata.scard('hash_paste:'+requested_path) > 0:
|
if r_serv_metadata.scard('hash_paste:'+requested_path) > 0:
|
||||||
set_b64 = r_serv_metadata.smembers('hash_paste:'+requested_path)
|
set_b64 = r_serv_metadata.smembers('hash_paste:'+requested_path)
|
||||||
for hash in set_b64:
|
for hash in set_b64:
|
||||||
print(requested_path)
|
nb_in_file = r_serv_metadata.zscore('nb_seen_hash:'+hash, requested_path)
|
||||||
nb_in_file = int(r_serv_metadata.zscore('nb_seen_hash:'+hash, requested_path))
|
# item list not updated
|
||||||
|
if nb_in_file is None:
|
||||||
|
l_pastes = r_serv_metadata.zrange('nb_seen_hash:'+hash, 0, -1)
|
||||||
|
for paste in l_pastes:
|
||||||
|
# dynamic update
|
||||||
|
if PASTES_FOLDER in paste:
|
||||||
|
score = r_serv_metadata.zscore('nb_seen_hash:{}'.format(hash), paste)
|
||||||
|
r_serv_metadata.zrem('nb_seen_hash:{}'.format(hash), paste)
|
||||||
|
paste = paste.replace(PASTES_FOLDER, '', 1)
|
||||||
|
r_serv_metadata.zadd('nb_seen_hash:{}'.format(hash), score, paste)
|
||||||
|
nb_in_file = r_serv_metadata.zscore('nb_seen_hash:'+hash, requested_path)
|
||||||
|
nb_in_file = int(nb_in_file)
|
||||||
estimated_type = r_serv_metadata.hget('metadata_hash:'+hash, 'estimated_type')
|
estimated_type = r_serv_metadata.hget('metadata_hash:'+hash, 'estimated_type')
|
||||||
file_type = estimated_type.split('/')[0]
|
file_type = estimated_type.split('/')[0]
|
||||||
# set file icon
|
# set file icon
|
||||||
|
|
Loading…
Reference in New Issue