mirror of https://github.com/CIRCL/AIL-framework
310 lines
12 KiB
Python
Executable File
310 lines
12 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
# -*-coding:UTF-8 -*
|
|
|
|
import os
|
|
import sys
|
|
|
|
from datetime import datetime
|
|
|
|
from flask import url_for
|
|
# from pymisp import MISPObject
|
|
|
|
sys.path.append(os.environ['AIL_BIN'])
|
|
##################################
|
|
# Import Project packages
|
|
##################################
|
|
from lib import ail_core
|
|
from lib.ConfigLoader import ConfigLoader
|
|
from lib.objects.abstract_subtype_object import AbstractSubtypeObject, get_all_id
|
|
from lib.data_retention_engine import update_obj_date
|
|
from lib.objects import ail_objects
|
|
from lib.timeline_engine import Timeline
|
|
|
|
from lib.correlations_engine import get_correlation_by_correl_type
|
|
|
|
config_loader = ConfigLoader()
|
|
baseurl = config_loader.get_config_str("Notifications", "ail_domain")
|
|
r_object = config_loader.get_db_conn("Kvrocks_Objects")
|
|
r_cache = config_loader.get_redis_conn("Redis_Cache")
|
|
config_loader = None
|
|
|
|
|
|
################################################################################
|
|
################################################################################
|
|
################################################################################
|
|
|
|
class Chat(AbstractSubtypeObject): # TODO # ID == username ?????
|
|
"""
|
|
AIL Chat Object. (strings)
|
|
"""
|
|
|
|
def __init__(self, id, subtype):
|
|
super(Chat, self).__init__('chat', id, subtype)
|
|
|
|
# def get_ail_2_ail_payload(self):
|
|
# payload = {'raw': self.get_gzip_content(b64=True),
|
|
# 'compress': 'gzip'}
|
|
# return payload
|
|
|
|
# # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\
|
|
def delete(self):
|
|
# # TODO:
|
|
pass
|
|
|
|
def get_link(self, flask_context=False):
|
|
if flask_context:
|
|
url = url_for('correlation.show_correlation', type=self.type, subtype=self.subtype, id=self.id)
|
|
else:
|
|
url = f'{baseurl}/correlation/show?type={self.type}&subtype={self.subtype}&id={self.id}'
|
|
return url
|
|
|
|
def get_svg_icon(self): # TODO
|
|
# if self.subtype == 'telegram':
|
|
# style = 'fab'
|
|
# icon = '\uf2c6'
|
|
# elif self.subtype == 'discord':
|
|
# style = 'fab'
|
|
# icon = '\uf099'
|
|
# else:
|
|
# style = 'fas'
|
|
# icon = '\uf007'
|
|
style = 'fas'
|
|
icon = '\uf086'
|
|
return {'style': style, 'icon': icon, 'color': '#4dffff', 'radius': 5}
|
|
|
|
def get_meta(self, options=set()):
|
|
meta = self._get_meta(options=options)
|
|
meta['id'] = self.id
|
|
meta['subtype'] = self.subtype
|
|
meta['tags'] = self.get_tags(r_list=True)
|
|
return meta
|
|
|
|
def get_misp_object(self):
|
|
# obj_attrs = []
|
|
# if self.subtype == 'telegram':
|
|
# obj = MISPObject('telegram-account', standalone=True)
|
|
# obj_attrs.append(obj.add_attribute('username', value=self.id))
|
|
#
|
|
# elif self.subtype == 'twitter':
|
|
# obj = MISPObject('twitter-account', standalone=True)
|
|
# obj_attrs.append(obj.add_attribute('name', value=self.id))
|
|
#
|
|
# else:
|
|
# obj = MISPObject('user-account', standalone=True)
|
|
# obj_attrs.append(obj.add_attribute('username', value=self.id))
|
|
#
|
|
# first_seen = self.get_first_seen()
|
|
# last_seen = self.get_last_seen()
|
|
# if first_seen:
|
|
# obj.first_seen = first_seen
|
|
# if last_seen:
|
|
# obj.last_seen = last_seen
|
|
# if not first_seen or not last_seen:
|
|
# self.logger.warning(
|
|
# f'Export error, None seen {self.type}:{self.subtype}:{self.id}, first={first_seen}, last={last_seen}')
|
|
#
|
|
# for obj_attr in obj_attrs:
|
|
# for tag in self.get_tags():
|
|
# obj_attr.add_tag(tag)
|
|
# return obj
|
|
return
|
|
|
|
############################################################################
|
|
############################################################################
|
|
|
|
# others optional metas, ... -> # TODO ALL meta in hset
|
|
|
|
def get_name(self): # get username ????
|
|
pass
|
|
|
|
# users that send at least a message else participants/spectator
|
|
# correlation created by messages
|
|
def get_users(self):
|
|
users = set()
|
|
accounts = self.get_correlation('user-account').get('user-account', [])
|
|
for account in accounts:
|
|
users.add(account[1:])
|
|
return users
|
|
|
|
def _get_timeline_username(self):
|
|
return Timeline(self.get_global_id(), 'username')
|
|
|
|
def get_username(self):
|
|
return self._get_timeline_username().get_last_obj_id()
|
|
|
|
def get_usernames(self):
|
|
return self._get_timeline_username().get_objs_ids()
|
|
|
|
def update_username_timeline(self, username_global_id, timestamp):
|
|
self._get_timeline_username().add_timestamp(timestamp, username_global_id)
|
|
|
|
|
|
# def get_last_message_id(self):
|
|
#
|
|
# return r_object.hget(f'meta:{self.type}:{self.subtype}:{self.id}', 'last:message:id')
|
|
|
|
def get_obj_message_id(self, obj_id):
|
|
if obj_id.endswith('.gz'):
|
|
obj_id = obj_id[:-3]
|
|
return int(obj_id.split('_')[-1])
|
|
|
|
def _get_message_timestamp(self, obj_global_id):
|
|
return r_object.zscore(f'messages:{self.type}:{self.subtype}:{self.id}', obj_global_id)
|
|
|
|
def _get_messages(self):
|
|
return r_object.zrange(f'messages:{self.type}:{self.subtype}:{self.id}', 0, -1, withscores=True)
|
|
|
|
def get_message_meta(self, obj_global_id, parent=True, mess_datetime=None):
|
|
obj = ail_objects.get_obj_from_global_id(obj_global_id)
|
|
mess_dict = obj.get_meta(options={'content', 'link', 'parent', 'user-account'})
|
|
if mess_dict.get('parent') and parent:
|
|
mess_dict['reply_to'] = self.get_message_meta(mess_dict['parent'], parent=False)
|
|
if mess_dict.get('user-account'):
|
|
user_account = ail_objects.get_obj_from_global_id(mess_dict['user-account'])
|
|
mess_dict['user-account'] = {}
|
|
mess_dict['user-account']['type'] = user_account.get_type()
|
|
mess_dict['user-account']['subtype'] = user_account.get_subtype(r_str=True)
|
|
mess_dict['user-account']['id'] = user_account.get_id()
|
|
username = user_account.get_username()
|
|
if username:
|
|
username = ail_objects.get_obj_from_global_id(username).get_default_meta(link=False)
|
|
mess_dict['user-account']['username'] = username # TODO get username at the given timestamp ???
|
|
else:
|
|
mess_dict['user-account']['id'] = 'UNKNOWN'
|
|
|
|
if not mess_datetime:
|
|
obj_mess_id = self._get_message_timestamp(obj_global_id)
|
|
mess_datetime = datetime.fromtimestamp(obj_mess_id)
|
|
mess_dict['date'] = mess_datetime.isoformat(' ')
|
|
mess_dict['hour'] = mess_datetime.strftime('%H:%M:%S')
|
|
return mess_dict
|
|
|
|
|
|
def get_messages(self, start=0, page=1, nb=500): # TODO limit nb returned, # TODO add replies
|
|
start = 0
|
|
stop = -1
|
|
# r_object.delete(f'messages:{self.type}:{self.subtype}:{self.id}')
|
|
|
|
# TODO chat without username ???? -> chat ID ????
|
|
|
|
messages = {}
|
|
curr_date = None
|
|
for message in self._get_messages():
|
|
date = datetime.fromtimestamp(message[1])
|
|
date_day = date.strftime('%Y/%m/%d')
|
|
if date_day != curr_date:
|
|
messages[date_day] = []
|
|
curr_date = date_day
|
|
mess_dict = self.get_message_meta(message[0], parent=True, mess_datetime=date)
|
|
messages[date_day].append(mess_dict)
|
|
return messages
|
|
|
|
# Zset with ID ??? id -> item id ??? multiple id == media + text
|
|
# id -> media id
|
|
# How do we handle reply/thread ??? -> separate with new chats name/id ZSET ???
|
|
# Handle media ???
|
|
|
|
# list of message id -> obj_id
|
|
# list of obj_id ->
|
|
# abuse parent children ???
|
|
|
|
# def add(self, timestamp, obj_id, mess_id=0, username=None, user_id=None):
|
|
# date = # TODO get date from object
|
|
# self.update_daterange(date)
|
|
# update_obj_date(date, self.type, self.subtype)
|
|
#
|
|
#
|
|
# # daily
|
|
# r_object.hincrby(f'{self.type}:{self.subtype}:{date}', self.id, 1)
|
|
# # all subtypes
|
|
# r_object.zincrby(f'{self.type}_all:{self.subtype}', 1, self.id)
|
|
#
|
|
# #######################################################################
|
|
# #######################################################################
|
|
#
|
|
# # Correlations
|
|
# self.add_correlation('item', '', item_id)
|
|
# # domain
|
|
# if is_crawled(item_id):
|
|
# domain = get_item_domain(item_id)
|
|
# self.add_correlation('domain', '', domain)
|
|
|
|
# TODO kvrocks exception if key don't exists
|
|
def get_obj_by_message_id(self, mess_id):
|
|
return r_object.hget(f'messages:ids:{self.type}:{self.subtype}:{self.id}', mess_id)
|
|
|
|
# importer -> use cache for previous reply SET to_add_id: previously_imported : expire SET key -> 30 mn
|
|
def add_message(self, obj_global_id, timestamp, mess_id, reply_id=None):
|
|
r_object.hset(f'messages:ids:{self.type}:{self.subtype}:{self.id}', mess_id, obj_global_id)
|
|
r_object.zadd(f'messages:{self.type}:{self.subtype}:{self.id}', {obj_global_id: timestamp})
|
|
|
|
if reply_id:
|
|
reply_obj = self.get_obj_by_message_id(reply_id)
|
|
if reply_obj:
|
|
self.add_obj_children(reply_obj, obj_global_id)
|
|
else:
|
|
self.add_message_cached_reply(reply_id, mess_id)
|
|
|
|
# ADD cached replies
|
|
for reply_obj in self.get_cached_message_reply(mess_id):
|
|
self.add_obj_children(obj_global_id, reply_obj)
|
|
|
|
def _get_message_cached_reply(self, message_id):
|
|
return r_cache.smembers(f'messages:ids:{self.type}:{self.subtype}:{self.id}:{message_id}')
|
|
|
|
def get_cached_message_reply(self, message_id):
|
|
objs_global_id = []
|
|
for mess_id in self._get_message_cached_reply(message_id):
|
|
obj_global_id = self.get_obj_by_message_id(mess_id)
|
|
if obj_global_id:
|
|
objs_global_id.append(obj_global_id)
|
|
return objs_global_id
|
|
|
|
def add_message_cached_reply(self, reply_to_id, message_id):
|
|
r_cache.sadd(f'messages:ids:{self.type}:{self.subtype}:{self.id}:{reply_to_id}', message_id)
|
|
r_cache.expire(f'messages:ids:{self.type}:{self.subtype}:{self.id}:{reply_to_id}', 600)
|
|
|
|
# TODO nb replies = nb son ???? what if it create a onion item ??? -> need source filtering
|
|
|
|
|
|
# TODO factorize
|
|
def get_all_subtypes():
|
|
return ail_core.get_object_all_subtypes('chat')
|
|
|
|
def get_all():
|
|
objs = {}
|
|
for subtype in get_all_subtypes():
|
|
objs[subtype] = get_all_by_subtype(subtype)
|
|
return objs
|
|
|
|
def get_all_by_subtype(subtype):
|
|
return get_all_id('chat', subtype)
|
|
|
|
# # TODO FILTER NAME + Key + mail
|
|
# def sanitize_username_name_to_search(name_to_search, subtype): # TODO FILTER NAME
|
|
#
|
|
# return name_to_search
|
|
#
|
|
# def search_usernames_by_name(name_to_search, subtype, r_pos=False):
|
|
# usernames = {}
|
|
# # for subtype in subtypes:
|
|
# r_name = sanitize_username_name_to_search(name_to_search, subtype)
|
|
# if not name_to_search or isinstance(r_name, dict):
|
|
# # break
|
|
# return usernames
|
|
# r_name = re.compile(r_name)
|
|
# for user_name in get_all_usernames_by_subtype(subtype):
|
|
# res = re.search(r_name, user_name)
|
|
# if res:
|
|
# usernames[user_name] = {}
|
|
# if r_pos:
|
|
# usernames[user_name]['hl-start'] = res.start()
|
|
# usernames[user_name]['hl-end'] = res.end()
|
|
# return usernames
|
|
|
|
|
|
if __name__ == '__main__':
|
|
chat = Chat('test', 'telegram')
|
|
r = chat.get_messages()
|
|
print(r)
|