chg: [import er url_extract] add item twitter parent

pull/497/head
Terrtia 2020-04-30 15:46:38 +02:00
parent eddeb0b3b0
commit d269a5b31b
No known key found for this signature in database
GPG Key ID: 1E1B1F50D84613D0
9 changed files with 52 additions and 10 deletions

View File

@ -35,7 +35,7 @@ class Ail_feeder_twitter(Default_json):
return os.path.join('twitter', item_date, item_id) + '.gz'
# # TODO:
def process_json_meta(self, process):
def process_json_meta(self, process, item_id):
'''
Process JSON meta filed.
'''

View File

@ -12,8 +12,8 @@ import json
import sys
import datetime
# sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
# import item_basic
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
import item_basic
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'import', 'ail_json_importer'))
from Default_json import Default_json
@ -39,8 +39,16 @@ class Ail_feeder_twitter(Default_json):
return os.path.join('urlextract', item_date, item_id) + '.gz'
# # TODO:
def process_json_meta(self, process):
def process_json_meta(self, process, item_id):
'''
Process JSON meta filed.
'''
return None
json_meta = self.get_json_meta()
# # TODO: change me
parent_type = 'twitter'
item_parent = json_meta['parent:twitter:tweet_id']
parent_date = datetime.date.today().strftime("%Y/%m/%d")
item_parent = os.path.join('twitter', parent_date, item_parent) + '.gz'
item_basic.add_item_parent(item_parent, item_id)

View File

@ -44,6 +44,9 @@ class Default_json(object):
'''
return self.json_item
def get_json_meta(self):
return self.json_item['meta']
def get_feeder_uuid(self):
pass
@ -62,7 +65,7 @@ class Default_json(object):
return os.path.join(self.get_feeder_name(), item_date, str(uuid.uuid4())) + '.gz'
## OVERWRITE ME ##
def process_json_meta(self, process):
def process_json_meta(self, process, item_id):
'''
Process JSON meta filed.
'''

View File

@ -81,7 +81,7 @@ def get_json_source(json_item):
def process_json(importer_obj, process):
item_id = importer_obj.get_item_id()
if 'meta' in importer_obj.get_json_file():
importer_obj.process_json_meta(process)
importer_obj.process_json_meta(process, item_id)
# send data to queue
send_item_to_ail_queue(item_id, importer_obj.get_item_gzip64encoded_content(), importer_obj.get_feeder_name(), process)

View File

@ -23,3 +23,11 @@ def exist_item(item_id):
def get_item_filepath(item_id):
filename = os.path.join(PASTES_FOLDER, item_id)
return os.path.realpath(filename)
def add_item_parent(item_parent, item_id):
if not exist_item(item_parent):
return False
else:
r_serv_metadata.hset('paste_metadata:{}'.format(item_id), 'father', item_parent)
r_serv_metadata.sadd('paste_children:{}'.format(item_parent), item_id)
return True

View File

@ -284,9 +284,15 @@ def get_domain(item_id):
item_id = item_id[-1]
return item_id[:-36]
def get_item_parent(item_id):
return r_serv_metadata.hget('paste_metadata:{}'.format(item_id), 'father')
def get_item_children(item_id):
return list(r_serv_metadata.smembers('paste_children:{}'.format(item_id)))
def add_item_parent(item_parent, item_id):
return item_basic.add_item_parent(item_parent, item_id)
def get_item_link(item_id):
return r_serv_metadata.hget('paste_metadata:{}'.format(item_id), 'real_link')
@ -306,7 +312,6 @@ def get_item_har_name(item_id):
def get_item_har(har_path):
pass
def get_item_filename(item_id):
# Creating the full filepath
filename = os.path.join(PASTES_FOLDER, item_id)

View File

@ -22,6 +22,7 @@ import requests
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/'))
import Tag
import Item
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
import Domain
@ -229,6 +230,8 @@ def showpaste(content_range, requested_path):
else:
crawler_metadata['get_metadata'] = False
item_parent = Item.get_item_parent(requested_path)
if Flask_config.pymisp is False:
misp = False
else:
@ -256,7 +259,7 @@ def showpaste(content_range, requested_path):
hive_url = hive_case_url.replace('id_here', hive_case)
return render_template("show_saved_paste.html", date=p_date, bootstrap_label=bootstrap_label, active_taxonomies=active_taxonomies, active_galaxies=active_galaxies, list_tags=list_tags, source=p_source, encoding=p_encoding, language=p_language, size=p_size, mime=p_mime, lineinfo=p_lineinfo, content=p_content, initsize=len(p_content), duplicate_list = p_duplicate_list, simil_list = p_simil_list, hashtype_list = p_hashtype_list, date_list=p_date_list,
crawler_metadata=crawler_metadata, tags_safe=tags_safe,
crawler_metadata=crawler_metadata, tags_safe=tags_safe, item_parent=item_parent,
l_64=l_64, vt_enabled=vt_enabled, misp=misp, hive=hive, misp_eventid=misp_eventid, misp_url=misp_url, hive_caseid=hive_caseid, hive_url=hive_url)
def get_item_basic_info(item):
@ -373,6 +376,8 @@ def show_item_min(requested_path , content_range=0):
else:
crawler_metadata['get_metadata'] = False
item_parent = Item.get_item_parent(requested_path)
misp_event = r_serv_metadata.get('misp_events:' + requested_path)
if misp_event is None:
misp_eventid = False
@ -391,6 +396,7 @@ def show_item_min(requested_path , content_range=0):
return render_template("show_saved_item_min.html", bootstrap_label=bootstrap_label, content=item_content,
item_basic_info=item_basic_info, item_info=item_info,
item_parent=item_parent,
initsize=len(item_content),
hashtype_list = p_hashtype_list,
crawler_metadata=crawler_metadata,

View File

@ -71,6 +71,12 @@
</div>
</div>
{% if item_parent %}
<div class="list-group" id="item_parent">
<a href="{{ url_for('showsavedpastes.showsavedpaste')}}?paste={{item_parent}}" target="_blank">{{item_parent}}</a>
</div>
{% endif %}
{% if misp_eventid %}
<div class="list-group" id="misp_event">
<li class="list-group-item active">MISP Events already Created</li>

View File

@ -198,6 +198,12 @@
</tbody>
</table>
{% if item_parent %}
<div>
<a href="{{ url_for('showsavedpastes.showsavedpaste')}}?paste={{item_parent}}" target="_blank">{{item_parent}}</a>
</div>
{% endif %}
<div>
{% with obj_type='item', obj_id=request.args.get('paste'), obj_lvl=0%}
{% include 'import_export/block_add_user_object_to_export.html' %}