mirror of https://github.com/CIRCL/AIL-framework
				
				
				
			
						commit
						9d481bd0b0
					
				|  | @ -206,8 +206,7 @@ class AbstractChatFeeder(DefaultFeeder, ABC): | |||
|         subchannel = ChatSubChannels.ChatSubChannel(f'{self.get_chat_id()}/{meta["id"]}', self.get_chat_instance_uuid()) | ||||
|         thread = None | ||||
| 
 | ||||
|         # TODO correlation with obj = message/image | ||||
|         subchannel.add(date) | ||||
|         subchannel.add(date, obj) | ||||
| 
 | ||||
|         if meta.get('date'): # TODO check if already exists | ||||
|             subchannel.set_created_at(int(meta['date']['timestamp'])) | ||||
|  | @ -358,7 +357,58 @@ class AbstractChatFeeder(DefaultFeeder, ABC): | |||
|             # CHAT | ||||
|             chat_objs = self.process_chat(new_objs, obj, date, timestamp, reply_id=reply_id) | ||||
| 
 | ||||
|             # Message forward | ||||
|             # # TODO HANDLE OTHERS OBJECT TYPE | ||||
|             # # TODO MAKE IT GENERIC FOR OTHERS CHATS !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! | ||||
|             # # Message forward + Discussion | ||||
|             # if self.get_json_meta().get('forward'): | ||||
|             #     discussion_id = self.get_json_meta().get('discussion') | ||||
|             #     forward_from = self.get_message_forward() | ||||
|             # | ||||
|             #     if discussion_id:       # TODO HANDLE FORWARDED MESSAGES FROM EXTERNAL CHANNELS | ||||
|             #         chat_forward_id = forward_from['from']['id'] | ||||
|             #         message_forward_id = forward_from['from']['channel_post'] | ||||
|             # | ||||
|             #         # if chat_forward_id == discussion_id: | ||||
|             #         #     linked_chat = Chat(chat_forward_id, self.get_chat_instance_uuid()) | ||||
|             #         #     if linked_chat.exists(): | ||||
|             #         #         # create thread | ||||
|             #         #         # add message replies for each childrens | ||||
|             # | ||||
|             # # TODO HANDLE THREAD | ||||
|             # # TODO Change FORWARD META FIELDS | ||||
|             # # meta['forward'] = {} | ||||
|             # #       # CHAT ID | ||||
|             # #       # SUBCHANNEL ID     -> can be None | ||||
|             # #       # Message ID | ||||
|             # | ||||
|             # # meta['forward']['origin'] | ||||
|             # #       # same as 'forward' | ||||
|             # | ||||
|             # if self.get_json_meta().get('forward'): | ||||
|             #     forward = self.get_message_forward() | ||||
|             #     f_chat = forward['chat'] | ||||
|             #     f_subchannel = forward.get('subchannel') | ||||
|             #     f_id = forward.get('id') | ||||
|             #     if not f_subchannel: | ||||
|             #         chat_forward = Chat(f_chat, self.get_chat_instance_uuid()) | ||||
|             #         if chat_forward.exists(): | ||||
|             #             for chat_obj in chat_objs: | ||||
|             #                 if chat_obj.type == 'chat': | ||||
|             #                     chat_forward.add_relationship(chat_obj.get_global_id(), 'forward') | ||||
|             #             # TODO LIST FORWARDED MESSAGES | ||||
|             # | ||||
|             # | ||||
|             # # Discord -> serverID + subchannel ID + message ID | ||||
|             # # Telegram -> chat ID + Message ID | ||||
|             # #                 + ORIGIN IDs | ||||
|             # | ||||
|             # | ||||
|             # | ||||
|             # # TODO create relationships graph | ||||
|             # | ||||
|             # | ||||
|             # # TODO REMOVE ME | ||||
|             # # Message forward  # TODO handle subchannel + message ID | ||||
|             # if self.get_json_meta().get('forward'): | ||||
|             #     forward_from = self.get_message_forward() | ||||
|             #     print('-----------------------------------------------------------') | ||||
|  |  | |||
|  | @ -7,6 +7,7 @@ import sys | |||
| import html2text | ||||
| 
 | ||||
| import gcld3 | ||||
| from lexilang.detector import detect as lexilang_detect | ||||
| from libretranslatepy import LibreTranslateAPI | ||||
| 
 | ||||
| sys.path.append(os.environ['AIL_BIN']) | ||||
|  | @ -264,7 +265,10 @@ def _get_html2text(content, ignore_links=False): | |||
|     h = html2text.HTML2Text() | ||||
|     h.ignore_links = ignore_links | ||||
|     h.ignore_images = ignore_links | ||||
|     return h.handle(content) | ||||
|     content = h.handle(content) | ||||
|     if content == '\n\n': | ||||
|         content = '' | ||||
|     return content | ||||
| 
 | ||||
| def _clean_text_to_translate(content, html=False, keys_blocks=True): | ||||
|     if html: | ||||
|  | @ -323,67 +327,105 @@ def get_objs_languages(obj_type, obj_subtype=''): | |||
| def get_obj_languages(obj_type, obj_subtype, obj_id): | ||||
|     return r_lang.smembers(f'obj:lang:{obj_type}:{obj_subtype}:{obj_id}') | ||||
| 
 | ||||
| def get_obj_language_stats(obj_type, obj_subtype, obj_id): | ||||
|     return r_lang.zrange(f'obj:langs:stat:{obj_type}:{obj_subtype}:{obj_id}', 0, -1, withscores=True) | ||||
| 
 | ||||
| # TODO ADD language to CHAT GLOBAL SET | ||||
| def add_obj_language(language, obj_type, obj_subtype, obj_id):  # (s) | ||||
| def add_obj_language(language, obj_type, obj_subtype, obj_id, objs_containers=set()):  # (s) | ||||
|     if not obj_subtype: | ||||
|         obj_subtype = '' | ||||
|     obj_global_id = f'{obj_type}:{obj_subtype}:{obj_id}' | ||||
| 
 | ||||
|     r_lang.sadd(f'objs:langs:{obj_type}', language) | ||||
|     r_lang.sadd(f'objs:lang:{obj_type}:{obj_subtype}', language) | ||||
|     r_lang.sadd(f'obj:lang:{obj_global_id}', language) | ||||
|     new = r_lang.sadd(f'obj:lang:{obj_global_id}', language) | ||||
| 
 | ||||
|     r_lang.sadd(f'languages:{language}', f'{obj_type}:{obj_subtype}') ################### REMOVE ME ??? | ||||
|     r_lang.sadd(f'langs:{obj_type}:{obj_subtype}:{language}', obj_global_id) | ||||
| 
 | ||||
| def remove_obj_language(language, obj_type, obj_subtype, obj_id): | ||||
|     if new: | ||||
|         for global_id in objs_containers: | ||||
|             r_lang.zincrby(f'obj:langs:stat:{global_id}', 1, language) | ||||
| 
 | ||||
| 
 | ||||
| def remove_obj_language(language, obj_type, obj_subtype, obj_id, objs_containers=set()): | ||||
|     if not obj_subtype: | ||||
|         obj_subtype = '' | ||||
|     obj_global_id = f'{obj_type}:{obj_subtype}:{obj_id}' | ||||
|     r_lang.srem(f'obj:lang:{obj_global_id}', language) | ||||
|     rem = r_lang.srem(f'obj:lang:{obj_global_id}', language) | ||||
| 
 | ||||
|     delete_obj_translation(obj_global_id, language) | ||||
| 
 | ||||
|     r_lang.srem(f'langs:{obj_type}:{obj_subtype}:{language}', obj_global_id) | ||||
|     if not r_lang.exists(f'langs:{obj_type}:{obj_subtype}:{language}'): | ||||
|         r_lang.srem(f'objs:lang:{obj_type}:{obj_subtype}', language) | ||||
|         r_lang.srem(f'languages:{language}', f'{obj_type}:{obj_subtype}') | ||||
|         if not r_lang.exists(f'objs:lang:{obj_type}:{obj_subtype}'): | ||||
|             if r_lang.scard(f'objs:langs:{obj_type}', language) <= 1: | ||||
|             if r_lang.scard(f'objs:langs:{obj_type}') <= 1: | ||||
|                 r_lang.srem(f'objs:langs:{obj_type}', language) | ||||
| 
 | ||||
| def edit_obj_language(language, obj_type, obj_subtype, obj_id): | ||||
|     remove_obj_language(language, obj_type, obj_subtype, obj_id) | ||||
|     add_obj_language(language, obj_type, obj_subtype, obj_id) | ||||
|     if rem: | ||||
|         for global_id in objs_containers: | ||||
|             r = r_lang.zincrby(f'obj:langs:stat:{global_id}', -1, language) | ||||
|             if r < 1: | ||||
|                 r_lang.zrem(f'obj:langs:stat:{global_id}', language) | ||||
| 
 | ||||
| # TODO handle fields | ||||
| def detect_obj_language(obj_type, obj_subtype, obj_id, content, objs_containers=set()): | ||||
|     detector = LanguagesDetector(nb_langs=1) | ||||
|     language = detector.detect(content) | ||||
|     if language: | ||||
|         language = language[0] | ||||
|         previous_lang = get_obj_languages(obj_type, obj_subtype, obj_id) | ||||
|         if previous_lang: | ||||
|             previous_lang = previous_lang.pop() | ||||
|             if language != previous_lang: | ||||
|                 remove_obj_language(previous_lang, obj_type, obj_subtype, obj_id, objs_containers=objs_containers) | ||||
|                 add_obj_language(language, obj_type, obj_subtype, obj_id, objs_containers=objs_containers) | ||||
|         else: | ||||
|             add_obj_language(language, obj_type, obj_subtype, obj_id, objs_containers=objs_containers) | ||||
|         return language | ||||
| 
 | ||||
| ## Translation | ||||
| def _get_obj_translation(obj_global_id, language, field=''): | ||||
| def r_get_obj_translation(obj_global_id, language, field=''): | ||||
|     return r_lang.hget(f'tr:{obj_global_id}:{field}', language) | ||||
| 
 | ||||
| def get_obj_translation(obj_global_id, language, source=None, content=None, field=''): | ||||
| def _get_obj_translation(obj_global_id, language, source=None, content=None, field='', objs_containers=set()): | ||||
|     """ | ||||
|         Returns translated content | ||||
|     """ | ||||
|     translation = r_cache.get(f'translation:{language}:{obj_global_id}:{field}') | ||||
|     r_cache.expire(f'translation:{language}:{obj_global_id}:{field}', 0) | ||||
|     if translation: | ||||
|         # DEBUG | ||||
|         # print('cache') | ||||
|         # r_cache.expire(f'translation:{language}:{obj_global_id}:{field}', 0) | ||||
|         return translation | ||||
|     # TODO HANDLE FIELDS TRANSLATION | ||||
|     translation = _get_obj_translation(obj_global_id, language, field=field) | ||||
|     translation = r_get_obj_translation(obj_global_id, language, field=field) | ||||
|     if not translation: | ||||
|         translation = LanguageTranslator().translate(content, source=source, target=language) | ||||
|         source, translation = LanguageTranslator().translate(content, source=source, target=language) | ||||
|         if source: | ||||
|             obj_type, subtype, obj_id = obj_global_id.split(':', 2) | ||||
|             add_obj_language(source, obj_type, subtype, obj_id, objs_containers=objs_containers) | ||||
|     if translation: | ||||
|         r_cache.set(f'translation:{language}:{obj_global_id}:{field}', translation) | ||||
|         r_cache.expire(f'translation:{language}:{obj_global_id}:{field}', 300) | ||||
|     return translation | ||||
| 
 | ||||
| def get_obj_translation(obj_global_id, language, source=None, content=None, field='', objs_containers=set()): | ||||
|     return _get_obj_translation(obj_global_id, language, source=source, content=content, field=field, objs_containers=objs_containers) | ||||
| 
 | ||||
| 
 | ||||
| # TODO Force to edit ???? | ||||
| 
 | ||||
| def set_obj_translation(obj_global_id, language, translation, field=''): | ||||
|     r_cache.delete(f'translation:{language}:{obj_global_id}:') | ||||
|     return r_lang.hset(f'tr:{obj_global_id}:{field}', language, translation) | ||||
| 
 | ||||
| def delete_obj_translation(obj_global_id, language, field=''): | ||||
|     r_cache.delete(f'translation:{language}:{obj_global_id}:') | ||||
|     r_lang.hdel(f'tr:{obj_global_id}:{field}', language) | ||||
| 
 | ||||
| ## --LANGUAGE ENGINE-- ## | ||||
| 
 | ||||
|  | @ -410,11 +452,22 @@ class LanguagesDetector: | |||
|         if self.min_len > 0: | ||||
|             if len(content) < self.min_len: | ||||
|                 return languages | ||||
|         # p = self.detector.FindTopNMostFreqLangs(content, num_langs=3) | ||||
|         # for lang in p: | ||||
|         #     print(lang.language, lang.probability, lang.proportion, lang.is_reliable) | ||||
|         # print('------------------------------------------------') | ||||
|         for lang in self.detector.FindTopNMostFreqLangs(content, num_langs=self.nb_langs): | ||||
|             if lang.proportion >= self.min_proportion and lang.probability >= self.min_probability and lang.is_reliable: | ||||
|                 languages.append(lang.language) | ||||
|         return languages | ||||
| 
 | ||||
|     def detect_lexilang(self, content):  # TODO clean text ??? - TODO REMOVE SEPARATOR | ||||
|         language, prob = lexilang_detect(content) | ||||
|         if prob > 0: | ||||
|             return [language] | ||||
|         else: | ||||
|             return [] | ||||
| 
 | ||||
|     def detect_libretranslate(self, content): | ||||
|         languages = [] | ||||
|         try: | ||||
|  | @ -431,19 +484,35 @@ class LanguagesDetector: | |||
|                     languages.append(language) | ||||
|         return languages | ||||
| 
 | ||||
|     def detect(self, content, force_gcld3=False): | ||||
|     def detect(self, content, force_gcld3=False):  # TODO detect length between 20-200 ???? | ||||
|         if not content: | ||||
|             return None | ||||
|         content = _clean_text_to_translate(content, html=True) | ||||
|         if not content: | ||||
|             return None | ||||
|         # DEBUG | ||||
|         # print('-------------------------------------------------------') | ||||
|         # print(content) | ||||
|         # print(len(content)) | ||||
|         # lexilang | ||||
|         if len(content) < 150: | ||||
|             # print('lexilang') | ||||
|             languages = self.detect_lexilang(content) | ||||
|         # gcld3 | ||||
|         if len(content) >= 200 or not self.lt or force_gcld3: | ||||
|             language = self.detect_gcld3(content) | ||||
|         # libretranslate | ||||
|         else: | ||||
|             language = self.detect_libretranslate(content) | ||||
|         return language | ||||
|             # if len(content) >= 200 or not self.lt or force_gcld3: | ||||
|             # print('gcld3') | ||||
|             languages = self.detect_gcld3(content) | ||||
|         # libretranslate | ||||
|         # else: | ||||
|         #     languages = self.detect_libretranslate(content) | ||||
|         return languages | ||||
| 
 | ||||
| class LanguageTranslator: | ||||
| 
 | ||||
|     def __init__(self): | ||||
|         self.lt = LibreTranslateAPI(get_translator_instance()) | ||||
|         self.ld = LanguagesDetector(nb_langs=1) | ||||
| 
 | ||||
|     def languages(self): | ||||
|         languages = [] | ||||
|  | @ -473,13 +542,13 @@ class LanguageTranslator: | |||
|             return language[0].get('language') | ||||
| 
 | ||||
|     def detect(self, content): | ||||
|         # gcld3 | ||||
|         if len(content) >= 200: | ||||
|             language = self.detect_gcld3(content) | ||||
|         # libretranslate | ||||
|         else: | ||||
|             language = self.detect_libretranslate(content) | ||||
|         return language | ||||
|         # print('++++++++++++++++++++++++++++++++++++++++++++++++++++++') | ||||
|         # print(content) | ||||
|         language = self.ld.detect(content) | ||||
|         if language: | ||||
|             # print(language[0]) | ||||
|             # print('##############################################################') | ||||
|             return language[0] | ||||
| 
 | ||||
|     def translate(self, content, source=None, target="en"):  # TODO source target | ||||
|         if target not in get_translation_languages(): | ||||
|  | @ -498,9 +567,9 @@ class LanguageTranslator: | |||
|                         translation = None | ||||
|                     # TODO LOG and display error | ||||
|                     if translation == content: | ||||
|                         print('EQUAL') | ||||
|                         # print('EQUAL') | ||||
|                         translation = None | ||||
|         return translation | ||||
|         return source, translation | ||||
| 
 | ||||
| 
 | ||||
| LIST_LANGUAGES = {} | ||||
|  |  | |||
|  | @ -323,7 +323,6 @@ def get_username_meta_from_global_id(username_global_id): | |||
|     username = Usernames.Username(username_id, instance_uuid) | ||||
|     return username.get_meta() | ||||
| 
 | ||||
| 
 | ||||
| # TODO Filter | ||||
| ## Instance type | ||||
| ## Chats IDS | ||||
|  | @ -380,6 +379,22 @@ def get_nb_messages_iterator(filters={}): | |||
|             nb_messages += chat.get_nb_messages() | ||||
|     return nb_messages | ||||
| 
 | ||||
| 
 | ||||
| #### FIX #### | ||||
| 
 | ||||
| def fix_correlations_subchannel_message(): | ||||
|     for instance_uuid in get_chat_service_instances(): | ||||
|         for chat_id in ChatServiceInstance(instance_uuid).get_chats(): | ||||
|             chat = Chats.Chat(chat_id, instance_uuid) | ||||
|             # subchannels | ||||
|             for subchannel_gid in chat.get_subchannels(): | ||||
|                 _, _, subchannel_id = subchannel_gid.split(':', 2) | ||||
|                 subchannel = ChatSubChannels.ChatSubChannel(subchannel_id, instance_uuid) | ||||
|                 messages, _ = subchannel._get_messages(nb=-1) | ||||
|                 for mess in messages: | ||||
|                     _, _, message_id = mess[0].split(':', ) | ||||
|                     subchannel.add_correlation('message', '', message_id) | ||||
| 
 | ||||
| #### API #### | ||||
| 
 | ||||
| def api_get_chat_service_instance(chat_instance_uuid): | ||||
|  | @ -392,6 +407,7 @@ def api_get_chat(chat_id, chat_instance_uuid, translation_target=None, nb=-1, pa | |||
|     chat = Chats.Chat(chat_id, chat_instance_uuid) | ||||
|     if not chat.exists(): | ||||
|         return {"status": "error", "reason": "Unknown chat"}, 404 | ||||
|     # print(chat.get_obj_language_stats()) | ||||
|     meta = chat.get_meta({'created_at', 'icon', 'info', 'nb_participants', 'subchannels', 'threads', 'translation', 'username'}, translation_target=translation_target) | ||||
|     if meta['username']: | ||||
|         meta['username'] = get_username_meta_from_global_id(meta['username']) | ||||
|  | @ -437,6 +453,7 @@ def api_get_subchannel(chat_id, chat_instance_uuid, translation_target=None, nb= | |||
|     subchannel = ChatSubChannels.ChatSubChannel(chat_id, chat_instance_uuid) | ||||
|     if not subchannel.exists(): | ||||
|         return {"status": "error", "reason": "Unknown subchannel"}, 404 | ||||
|     # print(subchannel.get_obj_language_stats()) | ||||
|     meta = subchannel.get_meta({'chat', 'created_at', 'icon', 'nb_messages', 'nb_participants', 'threads', 'translation'}, translation_target=translation_target) | ||||
|     if meta['chat']: | ||||
|         meta['chat'] = get_chat_meta_from_global_id(meta['chat']) | ||||
|  | @ -451,6 +468,7 @@ def api_get_thread(thread_id, thread_instance_uuid, translation_target=None, nb= | |||
|     thread = ChatThreads.ChatThread(thread_id, thread_instance_uuid) | ||||
|     if not thread.exists(): | ||||
|         return {"status": "error", "reason": "Unknown thread"}, 404 | ||||
|     # print(thread.get_obj_language_stats()) | ||||
|     meta = thread.get_meta({'chat', 'nb_messages', 'nb_participants'}) | ||||
|     # if meta['chat']: | ||||
|     #     meta['chat'] = get_chat_meta_from_global_id(meta['chat']) | ||||
|  | @ -461,18 +479,32 @@ def api_get_message(message_id, translation_target=None): | |||
|     message = Messages.Message(message_id) | ||||
|     if not message.exists(): | ||||
|         return {"status": "error", "reason": "Unknown uuid"}, 404 | ||||
|     meta = message.get_meta({'chat', 'content', 'files-names', 'icon', 'images', 'link', 'parent', 'parent_meta', 'reactions', 'thread', 'translation', 'user-account'}, translation_target=translation_target) | ||||
|     meta = message.get_meta({'chat', 'content', 'files-names', 'icon', 'images', 'language', 'link', 'parent', 'parent_meta', 'reactions', 'thread', 'translation', 'user-account'}, translation_target=translation_target) | ||||
|     return meta, 200 | ||||
| 
 | ||||
| def api_manually_translate_message(message_id, translation_target, translation): | ||||
| def api_message_detect_language(message_id): | ||||
|     message = Messages.Message(message_id) | ||||
|     if not message.exists(): | ||||
|         return {"status": "error", "reason": "Unknown uuid"}, 404 | ||||
|     lang = message.detect_language() | ||||
|     return {"language": lang}, 200 | ||||
| 
 | ||||
| def api_manually_translate_message(message_id, source, translation_target, translation): | ||||
|     message = Messages.Message(message_id) | ||||
|     if not message.exists(): | ||||
|         return {"status": "error", "reason": "Unknown uuid"}, 404 | ||||
|     if len(translation) > 200000: # TODO REVIEW LIMIT | ||||
|         return {"status": "error", "reason": "Max Size reached"}, 400 | ||||
|     if translation_target not in Language.get_translation_languages(): | ||||
|         return {"status": "error", "reason": "Unknown Language"}, 400 | ||||
|     if translation: | ||||
|         if len(translation) > 200000: # TODO REVIEW LIMIT | ||||
|             return {"status": "error", "reason": "Max Size reached"}, 400 | ||||
|     all_languages = Language.get_translation_languages() | ||||
|     if source not in all_languages: | ||||
|         return {"status": "error", "reason": "Unknown source Language"}, 400 | ||||
|     message_language = message.get_language() | ||||
|     if message_language != source: | ||||
|         message.edit_language(message_language, source) | ||||
|     if translation: | ||||
|         if translation_target not in all_languages: | ||||
|             return {"status": "error", "reason": "Unknown target Language"}, 400 | ||||
|         message.set_translation(translation_target, translation) | ||||
|     # TODO SANITYZE translation | ||||
|     return None, 200 | ||||
|  |  | |||
|  | @ -107,9 +107,24 @@ class Message(AbstractObject): | |||
|         return message_id | ||||
| 
 | ||||
|     def get_chat_id(self):  # TODO optimize -> use me to tag Chat | ||||
|         chat_id = self.get_basename().rsplit('_', 1)[0] | ||||
|         return chat_id | ||||
|         c_id = self.id.split('/') | ||||
|         return c_id[2] | ||||
| 
 | ||||
|     def get_chat(self): | ||||
|         c_id = self.id.split('/') | ||||
|         return f'chat:{c_id[0]}:{c_id[2]}' | ||||
| 
 | ||||
|     def get_subchannel(self): | ||||
|         subchannel = self.get_correlation('chat-subchannel') | ||||
|         if subchannel.get('chat-subchannel'): | ||||
|             return f'chat-subchannel:{subchannel["chat-subchannel"].pop()}' | ||||
| 
 | ||||
|     def get_current_thread(self): | ||||
|         subchannel = self.get_correlation('chat-thread') | ||||
|         if subchannel.get('chat-thread'): | ||||
|             return f'chat-thread:{subchannel["chat-thread"].pop()}' | ||||
| 
 | ||||
|     # children thread | ||||
|     def get_thread(self): | ||||
|         for child in self.get_childrens(): | ||||
|             obj_type, obj_subtype, obj_id = child.split(':', 2) | ||||
|  | @ -180,24 +195,12 @@ class Message(AbstractObject): | |||
|     # message media | ||||
|     # flag is deleted -> event or missing from feeder pass ??? | ||||
| 
 | ||||
|     def get_translation(self, content=None, source=None, target='fr'): | ||||
|         """ | ||||
|         Returns translated content | ||||
|         """ | ||||
| 
 | ||||
|         # return self._get_field('translated') | ||||
|         global_id = self.get_global_id() | ||||
|         translation = r_cache.get(f'translation:{target}:{global_id}') | ||||
|         r_cache.expire(f'translation:{target}:{global_id}', 0) | ||||
|         if translation: | ||||
|             return translation | ||||
|         if not content: | ||||
|             content = self.get_content() | ||||
|         translation = Language.LanguageTranslator().translate(content, source=source, target=target) | ||||
|         if translation: | ||||
|             r_cache.set(f'translation:{target}:{global_id}', translation) | ||||
|             r_cache.expire(f'translation:{target}:{global_id}', 300) | ||||
|         return translation | ||||
|     def get_language(self): | ||||
|         languages = self.get_languages() | ||||
|         if languages: | ||||
|             return languages.pop() | ||||
|         else: | ||||
|             return None | ||||
| 
 | ||||
|     def _set_translation(self, translation): | ||||
|         """ | ||||
|  | @ -296,8 +299,16 @@ class Message(AbstractObject): | |||
|             meta['files-names'] = self.get_files_names() | ||||
|         if 'reactions' in options: | ||||
|             meta['reactions'] = self.get_reactions() | ||||
|         if 'language' in options: | ||||
|             meta['language'] = self.get_language() | ||||
|         if 'translation' in options and translation_target: | ||||
|             meta['translation'] = self.translate(content=meta.get('content'), target=translation_target) | ||||
|             if meta.get('language'): | ||||
|                 source = meta['language'] | ||||
|             else: | ||||
|                 source = None | ||||
|             meta['translation'] = self.translate(content=meta.get('content'), source=source, target=translation_target) | ||||
|             if 'language' in options: | ||||
|                 meta['language'] = self.get_language() | ||||
| 
 | ||||
|         # meta['encoding'] = None | ||||
|         return meta | ||||
|  | @ -311,11 +322,29 @@ class Message(AbstractObject): | |||
|     #     self._set_translation(translated) | ||||
|     #     return translated | ||||
| 
 | ||||
|     def create(self, content, translation=None, tags=[]): | ||||
|     ## Language ## | ||||
| 
 | ||||
|     def get_objs_container(self): | ||||
|         objs_containers = set() | ||||
|         # chat | ||||
|         objs_containers.add(self.get_chat()) | ||||
|         subchannel = self.get_subchannel() | ||||
|         if subchannel: | ||||
|             objs_containers.add(subchannel) | ||||
|         thread = self.get_current_thread() | ||||
|         if thread: | ||||
|             objs_containers.add(thread) | ||||
|         return objs_containers | ||||
| 
 | ||||
|     #- Language -# | ||||
| 
 | ||||
|     def create(self, content, language=None, translation=None, tags=[]): | ||||
|         self._set_field('content', content) | ||||
|         # r_content.get(f'content:{self.type}:{self.get_subtype(r_str=True)}:{self.id}', content) | ||||
|         if translation: | ||||
|         if not language and content: | ||||
|             language = self.detect_language() | ||||
|         if translation and content: | ||||
|             self._set_translation(translation) | ||||
|             self.set_translation(language, translation) | ||||
|         for tag in tags: | ||||
|             self.add_tag(tag) | ||||
| 
 | ||||
|  |  | |||
|  | @ -226,7 +226,7 @@ class AbstractChatObject(AbstractSubtypeObject, ABC): | |||
|     def get_message_meta(self, message, timestamp=None, translation_target='', options=None):  # TODO handle file message | ||||
|         message = Messages.Message(message[9:]) | ||||
|         if not options: | ||||
|             options = {'content', 'files-names', 'images', 'link', 'parent', 'parent_meta', 'reactions', 'thread', 'translation', 'user-account'} | ||||
|             options = {'content', 'files-names', 'images', 'language', 'link', 'parent', 'parent_meta', 'reactions', 'thread', 'translation', 'user-account'} | ||||
|         meta = message.get_meta(options=options, timestamp=timestamp, translation_target=translation_target) | ||||
|         return meta | ||||
| 
 | ||||
|  |  | |||
|  | @ -25,7 +25,7 @@ from lib import Duplicate | |||
| from lib.correlations_engine import get_nb_correlations, get_correlations, add_obj_correlation, delete_obj_correlation, delete_obj_correlations, exists_obj_correlation, is_obj_correlated, get_nb_correlation_by_correl_type, get_obj_inter_correlation | ||||
| from lib.Investigations import is_object_investigated, get_obj_investigations, delete_obj_investigations | ||||
| from lib.relationships_engine import get_obj_nb_relationships, add_obj_relationship | ||||
| from lib.Language import get_obj_languages, add_obj_language, remove_obj_language, get_obj_translation, set_obj_translation | ||||
| from lib.Language import get_obj_languages, add_obj_language, remove_obj_language, detect_obj_language, get_obj_language_stats, get_obj_translation, set_obj_translation, delete_obj_translation | ||||
| from lib.Tracker import is_obj_tracked, get_obj_trackers, delete_obj_trackers | ||||
| 
 | ||||
| logging.config.dictConfig(ail_logger.get_config(name='ail')) | ||||
|  | @ -305,28 +305,45 @@ class AbstractObject(ABC): | |||
| 
 | ||||
|     ## -Relationship- ## | ||||
| 
 | ||||
|     def get_objs_container(self): | ||||
|         return set() | ||||
| 
 | ||||
|     ## Language ## | ||||
| 
 | ||||
|     def get_languages(self): | ||||
|         return get_obj_languages(self.type, self.get_subtype(r_str=True), self.id) | ||||
| 
 | ||||
|     def add_language(self, language): | ||||
|         return add_obj_language(language, self.type, self.get_subtype(r_str=True), self.id) | ||||
|         return add_obj_language(language, self.type, self.get_subtype(r_str=True), self.id, objs_containers=self.get_objs_container()) | ||||
| 
 | ||||
|     def remove_language(self, language): | ||||
|         return remove_obj_language(language, self.type, self.get_subtype(r_str=True), self.id) | ||||
|         return remove_obj_language(language, self.type, self.get_subtype(r_str=True), self.id, objs_containers=self.get_objs_container()) | ||||
| 
 | ||||
|     def edit_language(self, old_language, new_language): | ||||
|         if old_language: | ||||
|             self.remove_language(old_language) | ||||
|         self.add_language(new_language) | ||||
| 
 | ||||
|     def detect_language(self, field=''): | ||||
|         return detect_obj_language(self.type, self.get_subtype(r_str=True), self.id, self.get_content(), objs_containers=self.get_objs_container()) | ||||
| 
 | ||||
|     def get_obj_language_stats(self): | ||||
|         return get_obj_language_stats(self.type, self.get_subtype(r_str=True), self.id) | ||||
| 
 | ||||
|     def get_translation(self, language, field=''): | ||||
|         return get_obj_translation(self.get_global_id(), language, field=field) | ||||
|         return get_obj_translation(self.get_global_id(), language, field=field, objs_containers=self.get_objs_container()) | ||||
| 
 | ||||
|     def set_translation(self, language, translation, field=''): | ||||
|         return set_obj_translation(self.get_global_id(), language, translation, field=field) | ||||
| 
 | ||||
|     def delete_translation(self, language, field=''): | ||||
|         return delete_obj_translation(self.get_global_id(), language, field=field) | ||||
| 
 | ||||
|     def translate(self, content=None, field='', source=None, target='en'): | ||||
|         global_id = self.get_global_id() | ||||
|         if not content: | ||||
|             content = self.get_content() | ||||
|         translation = get_obj_translation(global_id, target, source=source, content=content, field=field) | ||||
|         translation = get_obj_translation(global_id, target, source=source, content=content, field=field, objs_containers=self.get_objs_container()) | ||||
|         return translation | ||||
| 
 | ||||
|     ## -Language- ## | ||||
|  |  | |||
|  | @ -33,6 +33,10 @@ class Languages(AbstractModule): | |||
|                 for lang in obj.get_languages(min_probability=0.8, force_gcld3=True): | ||||
|                     print(lang) | ||||
|                     domain.add_language(lang) | ||||
|         # Detect Chat Message Language | ||||
|         # elif obj.type == 'message': | ||||
|         #     lang = obj.detect_language() | ||||
|         #     print(self.obj.id, lang) | ||||
| 
 | ||||
| 
 | ||||
| if __name__ == '__main__': | ||||
|  |  | |||
|  | @ -20,17 +20,39 @@ from lib.ail_core import is_object_type | |||
| from lib import ail_queues | ||||
| from lib.objects import ail_objects | ||||
| 
 | ||||
| def reprocess_message_objects(object_type): | ||||
|     queue = ail_queues.AILQueue('FeederModuleImporter', -1) | ||||
|     for obj in ail_objects.obj_iterator(object_type, filters={}): | ||||
|         queue.send_message(obj.get_global_id(), message='reprocess') | ||||
|     queue.end() | ||||
| # from modules.ApiKey import ApiKey | ||||
| # from modules.Categ import Categ | ||||
| # from modules.CreditCards import CreditCards | ||||
| # from modules.DomClassifier import DomClassifier | ||||
| # from modules.Global import Global | ||||
| # from modules.Keys import Keys | ||||
| # from modules.Onion import Onion | ||||
| # from modules.Telegram import Telegram | ||||
| 
 | ||||
| from modules.Languages import Languages | ||||
| 
 | ||||
| MODULES = { | ||||
|     'Languages': Languages | ||||
| } | ||||
| 
 | ||||
| def reprocess_message_objects(object_type, module_name=None): | ||||
|     if module_name: | ||||
|         module = MODULES[module_name]() | ||||
|         for obj in ail_objects.obj_iterator(object_type, filters={}): | ||||
|             module.obj = obj | ||||
|             module.compute(None) | ||||
|     else: | ||||
|         queue = ail_queues.AILQueue('FeederModuleImporter', -1) | ||||
|         for obj in ail_objects.obj_iterator(object_type, filters={}): | ||||
|             queue.send_message(obj.get_global_id(), message='reprocess') | ||||
|         queue.end() | ||||
| 
 | ||||
| 
 | ||||
| if __name__ == "__main__": | ||||
| 
 | ||||
|     parser = argparse.ArgumentParser(description='Reprocess AIL Objects') | ||||
|     parser.add_argument('-t', '--type', type=str, help='AIL Object Type', required=True) | ||||
|     parser.add_argument('-m', '--module', type=str, help='AIL Module Name') | ||||
| 
 | ||||
|     args = parser.parse_args() | ||||
|     if not args.type: | ||||
|  | @ -43,4 +65,7 @@ if __name__ == "__main__": | |||
|     if obj_type not in ['item', 'message']:  # TODO image | ||||
|         raise Exception(f'Currently not supported Object Type: {obj_type}') | ||||
| 
 | ||||
|     reprocess_message_objects(obj_type) | ||||
|     modulename = args.module | ||||
|     if modulename not in MODULES: | ||||
|         raise Exception(f'Currently not supported Module: {modulename}') | ||||
|     reprocess_message_objects(obj_type, module_name=modulename) | ||||
|  |  | |||
|  | @ -10,6 +10,7 @@ sys.path.append(os.environ['AIL_HOME']) | |||
| ################################## | ||||
| from update.bin.ail_updater import AIL_Updater | ||||
| from lib import ail_updates | ||||
| from lib import chats_viewer | ||||
| 
 | ||||
| class Updater(AIL_Updater): | ||||
|     """default Updater.""" | ||||
|  | @ -19,6 +20,7 @@ class Updater(AIL_Updater): | |||
| 
 | ||||
| 
 | ||||
| if __name__ == '__main__': | ||||
|     chats_viewer.fix_correlations_subchannel_message() | ||||
|     updater = Updater('v5.4') | ||||
|     updater.run_update() | ||||
| 
 | ||||
|  |  | |||
|  | @ -23,6 +23,7 @@ git submodule update | |||
| echo "" | ||||
| echo -e $GREEN"Updating python packages ..."$DEFAULT | ||||
| echo "" | ||||
| pip install -U pyail | ||||
| pip install -U pylacus | ||||
| pip install -U git+https://github.com/ail-project/demoji | ||||
| pip install -U lexilang | ||||
|  |  | |||
|  | @ -244,15 +244,34 @@ def objects_message(): | |||
| @login_read_only | ||||
| def objects_message_translate(): | ||||
|     message_id = request.form.get('id') | ||||
|     source = request.form.get('language_target') | ||||
|     target = request.form.get('target') | ||||
|     translation = request.form.get('translation') | ||||
|     if target == "Don't Translate": | ||||
|         target = None | ||||
|     resp = chats_viewer.api_manually_translate_message(message_id, target, translation) | ||||
|     resp = chats_viewer.api_manually_translate_message(message_id, source, target, translation) | ||||
|     if resp[1] != 200: | ||||
|         return create_json_response(resp[0], resp[1]) | ||||
|     else: | ||||
|         return redirect(url_for('chats_explorer.objects_message', id=message_id, target=target)) | ||||
|         if request.referrer: | ||||
|             return redirect(request.referrer) | ||||
|         else: | ||||
|             return redirect(url_for('chats_explorer.objects_message', id=message_id, target=target)) | ||||
| 
 | ||||
| @chats_explorer.route("/objects/message/detect/language", methods=['GET']) | ||||
| @login_required | ||||
| @login_read_only | ||||
| def objects_message_detect_language(): | ||||
|     message_id = request.args.get('id') | ||||
|     target = request.args.get('target') | ||||
|     resp = chats_viewer.api_message_detect_language(message_id) | ||||
|     if resp[1] != 200: | ||||
|         return create_json_response(resp[0], resp[1]) | ||||
|     else: | ||||
|         if request.referrer: | ||||
|             return redirect(request.referrer) | ||||
|         else: | ||||
|             return redirect(url_for('chats_explorer.objects_message', id=message_id, target=target)) | ||||
| 
 | ||||
| @chats_explorer.route("/objects/user-account", methods=['GET']) | ||||
| @login_required | ||||
|  |  | |||
|  | @ -81,24 +81,6 @@ | |||
|             <hr class="m-1"> | ||||
|             <pre class="my-0 text-secondary">{{ message['translation'] }}</pre> | ||||
| 
 | ||||
|             {% set mess_id_escape= message['id'] | replace("/", "_") %} | ||||
|             <button class="btn btn-light p-0" type="button" data-toggle="collapse" data-target="#collapseTrans{{ mess_id_escape }}" aria-expanded="false" aria-controls="collapseTrans{{ mess_id_escape }}"> | ||||
|                 <i class="fas fa-language"></i> | ||||
|             </button> | ||||
|             <div class="collapse" id="collapseTrans{{ mess_id_escape }}"> | ||||
|                 <div class="card card-body"> | ||||
|                     <form method="post" action="{{ url_for('chats_explorer.objects_message_translate') }}" target="_blank"> | ||||
|                         <input type="text" id="id" name="id" value="{{message['id']}}" hidden> | ||||
|                         <input type="text" id="target" name="target" value="{{translation_target}}" hidden> | ||||
|                         <span>{{translation_target}}:</span> | ||||
|                         <textarea class="form-control" id="translation" name="translation">{{ message['translation'] }}</textarea> | ||||
|                         <button class="btn btn-primary" type="submit"> | ||||
|                             <i class="fas fa-pen-alt"> Manual Translation</i> | ||||
|                         </button> | ||||
|                     </form> | ||||
|                 </div> | ||||
|             </div> | ||||
| 
 | ||||
|         {% endif %} | ||||
|         {% for reaction in message['reactions'] %} | ||||
|             <span class="border rounded px-1">{{ reaction }} {{ message['reactions'][reaction] }}</span> | ||||
|  | @ -113,10 +95,47 @@ | |||
|             <span class="badge badge-{{ bootstrap_label[loop.index0 % 5] }}">{{ tag }}</span> | ||||
|         {%  endfor %} | ||||
|         <div class=""> | ||||
| 
 | ||||
|             {% set mess_id_escape= message['id'] | replace("/", "_") %} | ||||
|             <span class="btn btn-outline-dark p-0 px-1" type="button" data-toggle="collapse" data-target="#collapseTrans{{ mess_id_escape }}" aria-expanded="false" aria-controls="collapseTrans{{ mess_id_escape }}"> | ||||
|                 <i class="fas fa-language"></i> {% if message['language'] %}{{ message['language'] }}{% endif %} | ||||
|             </span> | ||||
|             <div class="collapse" id="collapseTrans{{ mess_id_escape }}"> | ||||
|                 <div class="card card-body"> | ||||
|                     <form method="post" action="{{ url_for('chats_explorer.objects_message_translate') }}"> | ||||
|                         <input type="text" id="id" name="id" value="{{message['id']}}" hidden> | ||||
|                         <span class="badge badge-primary">Source:</span> | ||||
|                         <span class=""> | ||||
|                             <select id="language_target" name="language_target" class="form-select" aria-label="Message Language" onchange="$('#translation').val('');"> | ||||
|                                     <option selected value="{{ message['language'] }}">{{ message['language'] }}</option> | ||||
|                                 {% for language in translation_languages %} | ||||
|                                     <option value="{{ language }}">{{ translation_languages[language] }}</option> | ||||
|                                 {% endfor %} | ||||
|                             </select> | ||||
|                         </span> | ||||
|                         {% if translation_target %} | ||||
|                             <input type="text" id="target" name="target" value="{{translation_target}}" hidden> | ||||
|                                 <span class="badge badge-primary">Target:</span><span>{{translation_target}}</span> | ||||
|                             <textarea class="form-control" id="translation" name="translation">{{ message['translation'] }}</textarea> | ||||
|                             <button class="btn btn-dark" type="submit"> | ||||
|                                 <i class="fas fa-pen-alt"> Update Language or Translation</i> | ||||
|                             </button> | ||||
|                         {% else %} | ||||
|                             <button class="btn btn-dark" type="submit"> | ||||
|                                 <i class="fas fa-pen-alt"> Update Language</i> | ||||
|                             </button> | ||||
|                         {% endif %} | ||||
|                     </form> | ||||
|                     <div> | ||||
|                         <a class="btn btn-primary" href="{{ url_for('chats_explorer.objects_message_detect_language')}}?id={{ message['id'] }}"> | ||||
|                             <i class="fas fa-redo"></i> Detect Language | ||||
|                         </a> | ||||
|                     </div> | ||||
|                 </div> | ||||
|             </div> | ||||
| 
 | ||||
|             <a class="btn btn-light btn-sm text-secondary px-1" href="{{ url_for('correlation.show_correlation')}}?type={{ message['type'] }}&subtype={{ message['subtype'] }}&id={{ message['id'] }}"><i class="fas fa-project-diagram"></i></a> | ||||
|             <a class="btn btn-light btn-sm text-secondary px-1" href="{{ message['link'] }}"><i class="fas fa-eye"></i></a> | ||||
|         </div> | ||||
|     </div> | ||||
| </div> | ||||
| 
 | ||||
| 
 | ||||
|  |  | |||
|  | @ -10,6 +10,7 @@ | |||
|                         <option selected>Don't Translate</option> | ||||
|                     {% else %} | ||||
|                         <option selected value="{{ translation_target }}">{{ translation_target }}</option> | ||||
|                         <option>Don't Translate</option> | ||||
|                     {% endif %} | ||||
|                     {% for language in translation_languages %} | ||||
|                         <option value="{{ language }}">{{ translation_languages[language] }}</option> | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	 terrtia
						terrtia