mirror of https://github.com/CIRCL/AIL-framework
chg: [importer] add map twitter id - item id, add parents link between twitter and url extracted
parent
1c412c9fed
commit
074ada504a
|
@ -12,8 +12,8 @@ import json
|
||||||
import sys
|
import sys
|
||||||
import datetime
|
import datetime
|
||||||
|
|
||||||
# sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
|
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
|
||||||
# import item_basic
|
import item_basic
|
||||||
|
|
||||||
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'import', 'ail_json_importer'))
|
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'import', 'ail_json_importer'))
|
||||||
from Default_json import Default_json
|
from Default_json import Default_json
|
||||||
|
@ -39,4 +39,6 @@ class Ail_feeder_twitter(Default_json):
|
||||||
'''
|
'''
|
||||||
Process JSON meta filed.
|
Process JSON meta filed.
|
||||||
'''
|
'''
|
||||||
|
twitter_id = str(self.json_item['meta']['twitter:tweet_id'])
|
||||||
|
item_basic.add_map_obj_id_item_id(twitter_id, item_id, 'twitter_id')
|
||||||
return None
|
return None
|
||||||
|
|
|
@ -53,9 +53,7 @@ class Ail_feeder_urlextract(Default_json):
|
||||||
json_meta = self.get_json_meta()
|
json_meta = self.get_json_meta()
|
||||||
|
|
||||||
# # TODO: change me
|
# # TODO: change me
|
||||||
parent_type = 'twitter'
|
parent_type = 'twitter_id'
|
||||||
item_parent = str(json_meta['parent:twitter:tweet_id'])
|
|
||||||
parent_date = datetime.date.today().strftime("%Y/%m/%d")
|
|
||||||
item_parent = os.path.join('twitter', parent_date, item_parent) + '.gz'
|
|
||||||
|
|
||||||
item_basic.add_item_parent(item_parent, item_id)
|
parent_id = str(json_meta['parent:twitter:tweet_id'])
|
||||||
|
item_basic.add_item_parent_by_parent_id(parent_type, parent_id, item_id)
|
||||||
|
|
|
@ -26,10 +26,26 @@ def get_item_filepath(item_id):
|
||||||
filename = os.path.join(PASTES_FOLDER, item_id)
|
filename = os.path.join(PASTES_FOLDER, item_id)
|
||||||
return os.path.realpath(filename)
|
return os.path.realpath(filename)
|
||||||
|
|
||||||
def add_item_parent(item_parent, item_id):
|
def add_item_parent_by_parent_id(parent_type, parent_id, item_id):
|
||||||
if not exist_item(item_parent):
|
parent_item_id = get_obj_id_item_id(parent_type, parent_id)
|
||||||
|
if parent_item_id:
|
||||||
|
add_item_parent(item_parent, item_id)
|
||||||
|
|
||||||
|
def add_item_parent(parent_item_id, item_id):
|
||||||
|
if not exist_item(parent_item_id):
|
||||||
return False
|
return False
|
||||||
else:
|
else:
|
||||||
r_serv_metadata.hset('paste_metadata:{}'.format(item_id), 'father', item_parent)
|
r_serv_metadata.hset('paste_metadata:{}'.format(item_id), 'father', parent_item_id)
|
||||||
r_serv_metadata.sadd('paste_children:{}'.format(item_parent), item_id)
|
r_serv_metadata.sadd('paste_children:{}'.format(parent_item_id), item_id)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
def add_map_obj_id_item_id(obj_id, item_id, obj_type):
|
||||||
|
if obj_type == 'twitter_id':
|
||||||
|
r_serv_metadata.hset('map:twitter_id:item_id', obj_id, item_id)
|
||||||
|
|
||||||
|
def get_obj_id_item_id(parent_type, parent_id):
|
||||||
|
all_parents_type = ['twitter_id']
|
||||||
|
if parent_type in all_parents_type:
|
||||||
|
return r_serv_metadata.hget('map:twitter_id:item_id', parent_id)
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
|
@ -109,7 +109,7 @@ $AIL_HOME/doc/generate_modules_data_flow_graph.sh
|
||||||
# init update version
|
# init update version
|
||||||
pushd ${AIL_HOME}
|
pushd ${AIL_HOME}
|
||||||
# shallow clone
|
# shallow clone
|
||||||
git fetch --tags --prune
|
git fetch --tags --prune --unshallow
|
||||||
git describe --abbrev=0 --tags | tr -d '\n' > ${AIL_HOME}/update/current_version
|
git describe --abbrev=0 --tags | tr -d '\n' > ${AIL_HOME}/update/current_version
|
||||||
echo "AIL current version:"
|
echo "AIL current version:"
|
||||||
git describe --abbrev=0 --tags
|
git describe --abbrev=0 --tags
|
||||||
|
|
Loading…
Reference in New Issue