mirror of https://github.com/CIRCL/AIL-framework
				
				
				
			chg: [chats] translate messages on demand
							parent
							
								
									941838ab76
								
							
						
					
					
						commit
						bef4e69a68
					
				|  | @ -4,6 +4,20 @@ | |||
| import os | ||||
| import sys | ||||
| 
 | ||||
| import cld3 | ||||
| from libretranslatepy import LibreTranslateAPI | ||||
| 
 | ||||
| sys.path.append(os.environ['AIL_BIN']) | ||||
| ################################## | ||||
| # Import Project packages | ||||
| ################################## | ||||
| from lib.ConfigLoader import ConfigLoader | ||||
| 
 | ||||
| config_loader = ConfigLoader() | ||||
| TRANSLATOR_URL = config_loader.get_config_str('Translation', 'libretranslate') | ||||
| config_loader = None | ||||
| 
 | ||||
| 
 | ||||
| dict_iso_languages = { | ||||
|     'af': 'Afrikaans', | ||||
|     'am': 'Amharic', | ||||
|  | @ -237,3 +251,77 @@ def get_iso_from_languages(l_languages, sort=False): | |||
|     if sort: | ||||
|         l_iso = sorted(l_iso) | ||||
|     return l_iso | ||||
| 
 | ||||
| 
 | ||||
| class LanguageDetector: | ||||
|     pass | ||||
| 
 | ||||
| def get_translator_instance(): | ||||
|     return TRANSLATOR_URL | ||||
| 
 | ||||
| class LanguageTranslator: | ||||
| 
 | ||||
|     def __init__(self): | ||||
|         self.lt = LibreTranslateAPI(get_translator_instance()) | ||||
| 
 | ||||
|     def languages(self): | ||||
|         languages = [] | ||||
|         try: | ||||
|             for dict_lang in self.lt.languages(): | ||||
|                 languages.append({'iso': dict_lang['code'], 'language': dict_lang['name']}) | ||||
|         except: | ||||
|             pass | ||||
|         return languages | ||||
| 
 | ||||
|     def detect_cld3(self, content): | ||||
|         for lang in cld3.get_frequent_languages(content, num_langs=1): | ||||
|             return lang.language | ||||
| 
 | ||||
|     def detect_libretranslate(self, content): | ||||
|         try: | ||||
|             language = self.lt.detect(content) | ||||
|         except:  # TODO ERROR MESSAGE | ||||
|             language = None | ||||
|         if language: | ||||
|             return language[0].get('language') | ||||
| 
 | ||||
|     def detect(self, content):  # TODO replace by gcld3 | ||||
|         # cld3 | ||||
|         if len(content) >= 200: | ||||
|             language = self.detect_cld3(content) | ||||
|         # libretranslate | ||||
|         else: | ||||
|             language = self.detect_libretranslate(content) | ||||
|         return language | ||||
| 
 | ||||
|     def translate(self, content, source=None, target="en"):  # TODO source target | ||||
|         translation = None | ||||
|         if content: | ||||
|             if not source: | ||||
|                 source = self.detect(content) | ||||
|             # print(source, content) | ||||
|             if source: | ||||
|                 if source != target: | ||||
|                     try: | ||||
|                         # print(content, source, target) | ||||
|                         translation = self.lt.translate(content, source, target) | ||||
|                     except: | ||||
|                         translation = None | ||||
|                     # TODO LOG and display error | ||||
|                     if translation == content: | ||||
|                         print('EQUAL') | ||||
|                         translation = None | ||||
|         return translation | ||||
| 
 | ||||
| 
 | ||||
| LIST_LANGUAGES = LanguageTranslator().languages() | ||||
| 
 | ||||
| def get_translation_languages(): | ||||
|     return LIST_LANGUAGES | ||||
| 
 | ||||
| 
 | ||||
| if __name__ == '__main__': | ||||
|     t_content = '' | ||||
|     langg = LanguageTranslator() | ||||
|     # lang.translate(t_content, source='ru') | ||||
|     langg.languages() | ||||
|  |  | |||
|  | @ -21,6 +21,7 @@ from lib.objects import Chats | |||
| from lib.objects import ChatSubChannels | ||||
| from lib.objects import ChatThreads | ||||
| from lib.objects import Messages | ||||
| from lib.objects import UsersAccount | ||||
| from lib.objects import Usernames | ||||
| 
 | ||||
| config_loader = ConfigLoader() | ||||
|  | @ -307,7 +308,7 @@ def api_get_chat_service_instance(chat_instance_uuid): | |||
|         return {"status": "error", "reason": "Unknown uuid"}, 404 | ||||
|     return chat_instance.get_meta({'chats'}), 200 | ||||
| 
 | ||||
| def api_get_chat(chat_id, chat_instance_uuid): | ||||
| def api_get_chat(chat_id, chat_instance_uuid, translation_target=None): | ||||
|     chat = Chats.Chat(chat_id, chat_instance_uuid) | ||||
|     if not chat.exists(): | ||||
|         return {"status": "error", "reason": "Unknown chat"}, 404 | ||||
|  | @ -317,7 +318,7 @@ def api_get_chat(chat_id, chat_instance_uuid): | |||
|     if meta['subchannels']: | ||||
|         meta['subchannels'] = get_subchannels_meta_from_global_id(meta['subchannels']) | ||||
|     else: | ||||
|         meta['messages'], meta['tags_messages'] = chat.get_messages() | ||||
|         meta['messages'], meta['tags_messages'] = chat.get_messages(translation_target=translation_target) | ||||
|     return meta, 200 | ||||
| 
 | ||||
| def api_get_nb_message_by_week(chat_id, chat_instance_uuid): | ||||
|  | @ -328,7 +329,7 @@ def api_get_nb_message_by_week(chat_id, chat_instance_uuid): | |||
|     # week = chat.get_nb_message_by_week('20231109') | ||||
|     return week, 200 | ||||
| 
 | ||||
| def api_get_subchannel(chat_id, chat_instance_uuid): | ||||
| def api_get_subchannel(chat_id, chat_instance_uuid, translation_target=None): | ||||
|     subchannel = ChatSubChannels.ChatSubChannel(chat_id, chat_instance_uuid) | ||||
|     if not subchannel.exists(): | ||||
|         return {"status": "error", "reason": "Unknown subchannel"}, 404 | ||||
|  | @ -339,17 +340,17 @@ def api_get_subchannel(chat_id, chat_instance_uuid): | |||
|         meta['threads'] = get_threads_metas(meta['threads']) | ||||
|     if meta.get('username'): | ||||
|         meta['username'] = get_username_meta_from_global_id(meta['username']) | ||||
|     meta['messages'], meta['tags_messages'] = subchannel.get_messages() | ||||
|     meta['messages'], meta['tags_messages'] = subchannel.get_messages(translation_target=translation_target) | ||||
|     return meta, 200 | ||||
| 
 | ||||
| def api_get_thread(thread_id, thread_instance_uuid): | ||||
| def api_get_thread(thread_id, thread_instance_uuid, translation_target=None): | ||||
|     thread = ChatThreads.ChatThread(thread_id, thread_instance_uuid) | ||||
|     if not thread.exists(): | ||||
|         return {"status": "error", "reason": "Unknown thread"}, 404 | ||||
|     meta = thread.get_meta({'chat', 'nb_messages'}) | ||||
|     # if meta['chat']: | ||||
|     #     meta['chat'] = get_chat_meta_from_global_id(meta['chat']) | ||||
|     meta['messages'], meta['tags_messages'] = thread.get_messages() | ||||
|     meta['messages'], meta['tags_messages'] = thread.get_messages(translation_target=translation_target) | ||||
|     return meta, 200 | ||||
| 
 | ||||
| def api_get_message(message_id): | ||||
|  | @ -362,6 +363,14 @@ def api_get_message(message_id): | |||
|     #     # meta['chat'] = | ||||
|     return meta, 200 | ||||
| 
 | ||||
| def api_get_user_account(user_id, instance_uuid): | ||||
|     user_account = UsersAccount.UserAccount(user_id, instance_uuid) | ||||
|     if not user_account.exists(): | ||||
|         return {"status": "error", "reason": "Unknown user-account"}, 404 | ||||
|     meta = user_account.get_meta({'icon', 'username'}) | ||||
|     print(meta) | ||||
|     return meta, 200 | ||||
| 
 | ||||
| # # # # # # # # # # LATER | ||||
| #                 # | ||||
| #   ChatCategory  # | ||||
|  |  | |||
|  | @ -18,6 +18,7 @@ sys.path.append(os.environ['AIL_BIN']) | |||
| from lib.ail_core import get_ail_uuid | ||||
| from lib.objects.abstract_object import AbstractObject | ||||
| from lib.ConfigLoader import ConfigLoader | ||||
| from lib import Language | ||||
| from lib.objects import UsersAccount | ||||
| from lib.data_retention_engine import update_obj_date, get_obj_date_first | ||||
| # TODO Set all messages ??? | ||||
|  | @ -76,7 +77,13 @@ class Message(AbstractObject): | |||
|         """ | ||||
|         Returns content | ||||
|         """ | ||||
|         content = self._get_field('content') | ||||
|         global_id = self.get_global_id() | ||||
|         content = r_cache.get(f'content:{global_id}') | ||||
|         if not content: | ||||
|             content = self._get_field('content') | ||||
|             if content: | ||||
|                 r_cache.set(f'content:{global_id}', content) | ||||
|                 r_cache.expire(f'content:{global_id}', 300) | ||||
|         if r_type == 'str': | ||||
|             return content | ||||
|         elif r_type == 'bytes': | ||||
|  | @ -153,11 +160,23 @@ class Message(AbstractObject): | |||
|     # message from channel ??? | ||||
|     # message media | ||||
| 
 | ||||
|     def get_translation(self):  # TODO support multiple translated languages ????? | ||||
|     def get_translation(self, content=None, source=None, target='fr'): | ||||
|         """ | ||||
|         Returns translated content | ||||
|         """ | ||||
|         return self._get_field('translated')  # TODO multiples translation ... -> use set | ||||
|         # return self._get_field('translated') | ||||
|         global_id = self.get_global_id() | ||||
|         translation = r_cache.get(f'translation:{target}:{global_id}') | ||||
|         r_cache.expire(f'translation:{target}:{global_id}', 0) | ||||
|         if translation: | ||||
|             return translation | ||||
|         if not content: | ||||
|             content = self.get_content() | ||||
|         translation = Language.LanguageTranslator().translate(content, source=source, target=target) | ||||
|         if translation: | ||||
|             r_cache.set(f'translation:{target}:{global_id}', translation) | ||||
|             r_cache.expire(f'translation:{target}:{global_id}', 300) | ||||
|         return translation | ||||
| 
 | ||||
|     def _set_translation(self, translation): | ||||
|         """ | ||||
|  | @ -209,7 +228,7 @@ class Message(AbstractObject): | |||
|     #     return r_object.hget(f'meta:item::{self.id}', 'url') | ||||
| 
 | ||||
|     # options: set of optional meta fields | ||||
|     def get_meta(self, options=None, timestamp=None): | ||||
|     def get_meta(self, options=None, timestamp=None, translation_target='en'): | ||||
|         """ | ||||
|         :type options: set | ||||
|         :type timestamp: float | ||||
|  | @ -239,7 +258,7 @@ class Message(AbstractObject): | |||
|                 parent_type, _, parent_id = meta['parent'].split(':', 3) | ||||
|                 if parent_type == 'message': | ||||
|                     message = Message(parent_id) | ||||
|                     meta['reply_to'] = message.get_meta(options=options) | ||||
|                     meta['reply_to'] = message.get_meta(options=options, translation_target=translation_target) | ||||
|         if 'investigations' in options: | ||||
|             meta['investigations'] = self.get_investigations() | ||||
|         if 'link' in options: | ||||
|  | @ -262,6 +281,8 @@ class Message(AbstractObject): | |||
|             meta['files-names'] = self.get_files_names() | ||||
|         if 'reactions' in options: | ||||
|             meta['reactions'] = self.get_reactions() | ||||
|         if 'translation' in options and translation_target: | ||||
|             meta['translation'] = self.get_translation(content=meta.get('content'), target=translation_target) | ||||
| 
 | ||||
|         # meta['encoding'] = None | ||||
|         return meta | ||||
|  |  | |||
|  | @ -179,12 +179,12 @@ class AbstractChatObject(AbstractSubtypeObject, ABC): | |||
|         week_date = Date.get_current_week_day() | ||||
|         return self.get_nb_message_by_week(week_date) | ||||
| 
 | ||||
|     def get_message_meta(self, message, timestamp=None):  # TODO handle file message | ||||
|     def get_message_meta(self, message, timestamp=None, translation_target='en'):  # TODO handle file message | ||||
|         message = Messages.Message(message[9:]) | ||||
|         meta = message.get_meta(options={'content', 'files-names', 'images', 'link', 'parent', 'parent_meta', 'reactions', 'thread', 'user-account'}, timestamp=timestamp) | ||||
|         meta = message.get_meta(options={'content', 'files-names', 'images', 'link', 'parent', 'parent_meta', 'reactions', 'thread', 'translation', 'user-account'}, timestamp=timestamp, translation_target=translation_target) | ||||
|         return meta | ||||
| 
 | ||||
|     def get_messages(self, start=0, page=1, nb=500, unread=False):  # threads ???? # TODO ADD last/first message timestamp + return page | ||||
|     def get_messages(self, start=0, page=1, nb=500, unread=False, translation_target='en'):  # threads ???? # TODO ADD last/first message timestamp + return page | ||||
|         # TODO return message meta | ||||
|         tags = {} | ||||
|         messages = {} | ||||
|  | @ -195,7 +195,7 @@ class AbstractChatObject(AbstractSubtypeObject, ABC): | |||
|             if date_day != curr_date: | ||||
|                 messages[date_day] = [] | ||||
|                 curr_date = date_day | ||||
|             mess_dict = self.get_message_meta(message[0], timestamp=timestamp) | ||||
|             mess_dict = self.get_message_meta(message[0], timestamp=timestamp, translation_target=translation_target) | ||||
|             messages[date_day].append(mess_dict) | ||||
| 
 | ||||
|             if mess_dict.get('tags'): | ||||
|  |  | |||
|  | @ -262,6 +262,9 @@ default_har = True | |||
| default_screenshot = True | ||||
| onion_proxy = onion.foundation | ||||
| 
 | ||||
| [Translation] | ||||
| libretranslate =  | ||||
| 
 | ||||
| [IP] | ||||
| # list of comma-separated CIDR that you wish to be alerted for. e.g: | ||||
| #networks = 192.168.34.0/24,10.0.0.0/8,192.168.33.0/24 | ||||
|  |  | |||
|  | @ -21,6 +21,7 @@ sys.path.append(os.environ['AIL_BIN']) | |||
| ################################## | ||||
| from lib import ail_core | ||||
| from lib import chats_viewer | ||||
| from lib import Language | ||||
| from lib import Tag | ||||
| 
 | ||||
| # ============ BLUEPRINT ============ | ||||
|  | @ -80,12 +81,14 @@ def chats_explorer_instance(): | |||
| def chats_explorer_chat(): | ||||
|     chat_id = request.args.get('id') | ||||
|     instance_uuid = request.args.get('uuid') | ||||
|     chat = chats_viewer.api_get_chat(chat_id, instance_uuid) | ||||
|     target = request.args.get('target') | ||||
|     chat = chats_viewer.api_get_chat(chat_id, instance_uuid, translation_target=target) | ||||
|     if chat[1] != 200: | ||||
|         return create_json_response(chat[0], chat[1]) | ||||
|     else: | ||||
|         chat = chat[0] | ||||
|         return render_template('chat_viewer.html', chat=chat, bootstrap_label=bootstrap_label) | ||||
|         languages = Language.get_translation_languages() | ||||
|         return render_template('chat_viewer.html', chat=chat, bootstrap_label=bootstrap_label, translation_languages=languages, translation_target=target) | ||||
| 
 | ||||
| @chats_explorer.route("chats/explorer/messages/stats/week", methods=['GET']) | ||||
| @login_required | ||||
|  | @ -105,12 +108,14 @@ def chats_explorer_messages_stats_week(): | |||
| def objects_subchannel_messages(): | ||||
|     subchannel_id = request.args.get('id') | ||||
|     instance_uuid = request.args.get('uuid') | ||||
|     subchannel = chats_viewer.api_get_subchannel(subchannel_id, instance_uuid) | ||||
|     target = request.args.get('target') | ||||
|     subchannel = chats_viewer.api_get_subchannel(subchannel_id, instance_uuid, translation_target=target) | ||||
|     if subchannel[1] != 200: | ||||
|         return create_json_response(subchannel[0], subchannel[1]) | ||||
|     else: | ||||
|         subchannel = subchannel[0] | ||||
|         return render_template('SubChannelMessages.html', subchannel=subchannel, bootstrap_label=bootstrap_label) | ||||
|         languages = Language.get_translation_languages() | ||||
|         return render_template('SubChannelMessages.html', subchannel=subchannel, bootstrap_label=bootstrap_label, translation_languages=languages, translation_target=target) | ||||
| 
 | ||||
| @chats_explorer.route("/chats/explorer/thread", methods=['GET']) | ||||
| @login_required | ||||
|  | @ -118,12 +123,14 @@ def objects_subchannel_messages(): | |||
| def objects_thread_messages(): | ||||
|     thread_id = request.args.get('id') | ||||
|     instance_uuid = request.args.get('uuid') | ||||
|     thread = chats_viewer.api_get_thread(thread_id, instance_uuid) | ||||
|     target = request.args.get('target') | ||||
|     thread = chats_viewer.api_get_thread(thread_id, instance_uuid, translation_target=target) | ||||
|     if thread[1] != 200: | ||||
|         return create_json_response(thread[0], thread[1]) | ||||
|     else: | ||||
|         meta = thread[0] | ||||
|         return render_template('ThreadMessages.html', meta=meta, bootstrap_label=bootstrap_label) | ||||
|         languages = Language.get_translation_languages() | ||||
|         return render_template('ThreadMessages.html', meta=meta, bootstrap_label=bootstrap_label, translation_languages=languages, translation_target=target) | ||||
| 
 | ||||
| @chats_explorer.route("/objects/message", methods=['GET']) | ||||
| @login_required | ||||
|  | @ -135,5 +142,6 @@ def objects_message(): | |||
|         return create_json_response(message[0], message[1]) | ||||
|     else: | ||||
|         message = message[0] | ||||
|         languages = Language.get_translation_languages() | ||||
|         return render_template('ChatMessage.html', meta=message, bootstrap_label=bootstrap_label, | ||||
|                                modal_add_tags=Tag.get_modal_add_tags(message['id'], object_type='message')) | ||||
|  |  | |||
|  | @ -184,6 +184,9 @@ | |||
|                 <span class="mt-3"> | ||||
|                     {% include 'objects/image/block_blur_img_slider.html' %} | ||||
|                 </span> | ||||
|                 {% with translate_url=url_for('chats_explorer.objects_subchannel_messages', uuid=subchannel['subtype']), obj_id=subchannel['id'] %} | ||||
|                     {% include 'chats_explorer/block_translation.html' %} | ||||
|                 {% endwith %} | ||||
| 
 | ||||
|                 <div class="position-relative"> | ||||
|                     <div class="chat-messages p-2"> | ||||
|  |  | |||
|  | @ -133,6 +133,9 @@ | |||
|                 <span class="mt-3"> | ||||
|                     {% include 'objects/image/block_blur_img_slider.html' %} | ||||
|                 </span> | ||||
|                 {% with translate_url=url_for('chats_explorer.objects_thread_messages', uuid=meta['subtype']), obj_id=meta['id'] %} | ||||
|                     {% include 'chats_explorer/block_translation.html' %} | ||||
|                 {% endwith %} | ||||
| 
 | ||||
|                 <div class="position-relative"> | ||||
|                     <div class="chat-messages p-2"> | ||||
|  |  | |||
|  | @ -48,6 +48,10 @@ | |||
|                     {% endif %} | ||||
|                 </div> | ||||
|                 <pre class="my-0">{{ message['reply_to']['content'] }}</pre> | ||||
|                 {% if message['reply_to']['translation'] %} | ||||
|                     <hr class="m-1"> | ||||
|                     <div class="my-0 text-secondary">{{ message['reply_to']['translation'] }}</div> | ||||
|                 {% endif %} | ||||
|                 {% for tag in message['reply_to']['tags'] %} | ||||
|                     <span class="badge badge-{{ bootstrap_label[loop.index0 % 5] }}">{{ tag }}</span> | ||||
|                 {%  endfor %} | ||||
|  | @ -71,6 +75,10 @@ | |||
|             {%  endfor %} | ||||
|         {% endif %} | ||||
|         <pre class="my-0">{{ message['content'] }}</pre> | ||||
|         {% if message['translation'] %} | ||||
|             <hr class="m-1"> | ||||
|             <pre class="my-0 text-secondary">{{ message['translation'] }}</pre> | ||||
|         {% endif %} | ||||
|         {% for reaction in message['reactions'] %} | ||||
|             <span class="border rounded px-1">{{ reaction }} {{ message['reactions'][reaction] }}</span> | ||||
|         {%  endfor %} | ||||
|  |  | |||
|  | @ -0,0 +1,37 @@ | |||
| <div class="card border-secondary"> | ||||
|     <div class="card-body py-2"> | ||||
|         <div class="row"> | ||||
|             <div class="col-md-3 text-center px-0"> | ||||
|                 Translation | ||||
|             </div> | ||||
|             <div class="col-md-6 text-center pl-0 pt-1"> | ||||
|                 <select id="translation_selector_target" class="form-select" aria-label="Default select example"> | ||||
|                     {% if not translation_target or translation_target == "Don't Translate" %} | ||||
|                         <option selected>Don't Translate</option> | ||||
|                     {% else %} | ||||
|                         <option selected value="{{ translation_target }}">{{ translation_target }}</option> | ||||
|                     {% endif %} | ||||
|                     {% for language in translation_languages %} | ||||
|                         <option value="{{ language['iso'] }}">{{ language['language'] }}</option> | ||||
|                     {% endfor %} | ||||
|                 </select> | ||||
|             </div> | ||||
|             <div class="col-md-3 text-center"> | ||||
|                 <button class="btn btn-sm btn-primary" onclick="translate_selector();"> | ||||
|                     <i class="fas fa-language"></i> | ||||
|                     <span class="label-icon">Translate</span> | ||||
|                 </button> | ||||
|             </div> | ||||
|         </div> | ||||
|     </div> | ||||
| </div> | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| <script> | ||||
| function translate_selector(){ | ||||
|     var t = document.getElementById("translation_selector_target"); | ||||
|     var target = t.value | ||||
|     window.location.replace("{{ translate_url }}&id={{ obj_id }}&target=" + target); | ||||
| } | ||||
| </script> | ||||
|  | @ -159,6 +159,10 @@ | |||
|                         {% include 'objects/image/block_blur_img_slider.html' %} | ||||
|                     </span> | ||||
| 
 | ||||
|                     {% with translate_url=url_for('chats_explorer.chats_explorer_chat', uuid=chat['subtype']), obj_id=chat['id'] %} | ||||
|                         {% include 'chats_explorer/block_translation.html' %} | ||||
|                     {% endwith %} | ||||
| 
 | ||||
|                     <div class="position-relative"> | ||||
|                         <div class="chat-messages p-2"> | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	 terrtia
						terrtia