mirror of https://github.com/CIRCL/AIL-framework
				
				
				
			chg: [language messages] add nb languages stats by chat/subchannel objects
							parent
							
								
									59ca8c5d31
								
							
						
					
					
						commit
						b9c37167ad
					
				|  | @ -324,24 +324,32 @@ def get_objs_languages(obj_type, obj_subtype=''): | |||
| def get_obj_languages(obj_type, obj_subtype, obj_id): | ||||
|     return r_lang.smembers(f'obj:lang:{obj_type}:{obj_subtype}:{obj_id}') | ||||
| 
 | ||||
| def get_obj_language_stats(obj_type, obj_subtype, obj_id): | ||||
|     return r_lang.zrange(f'obj:langs:stat:{obj_type}:{obj_subtype}:{obj_id}', 0, -1, withscores=True) | ||||
| 
 | ||||
| # TODO ADD language to CHAT GLOBAL SET | ||||
| def add_obj_language(language, obj_type, obj_subtype, obj_id):  # (s) | ||||
| def add_obj_language(language, obj_type, obj_subtype, obj_id, objs_containers=set()):  # (s) | ||||
|     if not obj_subtype: | ||||
|         obj_subtype = '' | ||||
|     obj_global_id = f'{obj_type}:{obj_subtype}:{obj_id}' | ||||
| 
 | ||||
|     r_lang.sadd(f'objs:langs:{obj_type}', language) | ||||
|     r_lang.sadd(f'objs:lang:{obj_type}:{obj_subtype}', language) | ||||
|     r_lang.sadd(f'obj:lang:{obj_global_id}', language) | ||||
|     new = r_lang.sadd(f'obj:lang:{obj_global_id}', language) | ||||
| 
 | ||||
|     r_lang.sadd(f'languages:{language}', f'{obj_type}:{obj_subtype}') ################### REMOVE ME ??? | ||||
|     r_lang.sadd(f'langs:{obj_type}:{obj_subtype}:{language}', obj_global_id) | ||||
| 
 | ||||
| def remove_obj_language(language, obj_type, obj_subtype, obj_id): | ||||
|     if new: | ||||
|         for global_id in objs_containers: | ||||
|             r_lang.zincrby(f'obj:langs:stat:{global_id}', 1, language) | ||||
| 
 | ||||
| 
 | ||||
| def remove_obj_language(language, obj_type, obj_subtype, obj_id, objs_containers=set()): | ||||
|     if not obj_subtype: | ||||
|         obj_subtype = '' | ||||
|     obj_global_id = f'{obj_type}:{obj_subtype}:{obj_id}' | ||||
|     r_lang.srem(f'obj:lang:{obj_global_id}', language) | ||||
|     rem = r_lang.srem(f'obj:lang:{obj_global_id}', language) | ||||
| 
 | ||||
|     delete_obj_translation(obj_global_id, language) | ||||
| 
 | ||||
|  | @ -353,27 +361,33 @@ def remove_obj_language(language, obj_type, obj_subtype, obj_id): | |||
|             if r_lang.scard(f'objs:langs:{obj_type}') <= 1: | ||||
|                 r_lang.srem(f'objs:langs:{obj_type}', language) | ||||
| 
 | ||||
|     if rem: | ||||
|         for global_id in objs_containers: | ||||
|             r = r_lang.zincrby(f'obj:langs:stat:{global_id}', -1, language) | ||||
|             if r < 1: | ||||
|                 r_lang.zrem(f'obj:langs:stat:{global_id}', language) | ||||
| 
 | ||||
| # TODO handle fields | ||||
| def detect_obj_language(obj_type, obj_subtype, obj_id, content): | ||||
| def detect_obj_language(obj_type, obj_subtype, obj_id, content, objs_containers=set()): | ||||
|     detector = LanguagesDetector(nb_langs=1) | ||||
|     language = detector.detect(content) | ||||
|     if language: | ||||
|         language = language[0] | ||||
|         previous_lang = get_obj_languages(obj_type, obj_subtype, obj_id) | ||||
|         if previous_lang: | ||||
|             previous_lang = previous_lang[0] | ||||
|             previous_lang = previous_lang.pop() | ||||
|             if language != previous_lang: | ||||
|                 remove_obj_language(language, obj_type, obj_subtype, obj_id) | ||||
|                 add_obj_language(language, obj_type, obj_subtype, obj_id) | ||||
|                 remove_obj_language(previous_lang, obj_type, obj_subtype, obj_id, objs_containers=objs_containers) | ||||
|                 add_obj_language(language, obj_type, obj_subtype, obj_id, objs_containers=objs_containers) | ||||
|         else: | ||||
|             add_obj_language(language, obj_type, obj_subtype, obj_id) | ||||
|             add_obj_language(language, obj_type, obj_subtype, obj_id, objs_containers=objs_containers) | ||||
|         return language | ||||
| 
 | ||||
| ## Translation | ||||
| def _get_obj_translation(obj_global_id, language, field=''): | ||||
| def r_get_obj_translation(obj_global_id, language, field=''): | ||||
|     return r_lang.hget(f'tr:{obj_global_id}:{field}', language) | ||||
| 
 | ||||
| def get_obj_translation(obj_global_id, language, source=None, content=None, field=''): | ||||
| def _get_obj_translation(obj_global_id, language, source=None, content=None, field='', objs_containers=set()): | ||||
|     """ | ||||
|         Returns translated content | ||||
|     """ | ||||
|  | @ -385,17 +399,20 @@ def get_obj_translation(obj_global_id, language, source=None, content=None, fiel | |||
|         # r_cache.expire(f'translation:{language}:{obj_global_id}:{field}', 0) | ||||
|         return translation | ||||
|     # TODO HANDLE FIELDS TRANSLATION | ||||
|     translation = _get_obj_translation(obj_global_id, language, field=field) | ||||
|     translation = r_get_obj_translation(obj_global_id, language, field=field) | ||||
|     if not translation: | ||||
|         source, translation = LanguageTranslator().translate(content, source=source, target=language) | ||||
|         if source and translation: | ||||
|             obj_type, subtype, obj_id = obj_global_id.split(':', 2) | ||||
|             add_obj_language(source, obj_type, subtype, obj_id) | ||||
|             add_obj_language(source, obj_type, subtype, obj_id, objs_containers=objs_containers) | ||||
|     if translation: | ||||
|         r_cache.set(f'translation:{language}:{obj_global_id}:{field}', translation) | ||||
|         r_cache.expire(f'translation:{language}:{obj_global_id}:{field}', 300) | ||||
|     return translation | ||||
| 
 | ||||
| def get_obj_translation(obj_global_id, language, source=None, content=None, field='', objs_containers=set()): | ||||
|     return _get_obj_translation(obj_global_id, language, source=source, content=content, field=field, objs_containers=objs_containers) | ||||
| 
 | ||||
| 
 | ||||
| # TODO Force to edit ???? | ||||
| 
 | ||||
|  |  | |||
|  | @ -480,7 +480,6 @@ def api_manually_translate_message(message_id, source, translation_target, trans | |||
|             return {"status": "error", "reason": "Max Size reached"}, 400 | ||||
|     all_languages = Language.get_translation_languages() | ||||
|     if source not in all_languages: | ||||
|         print(source) | ||||
|         return {"status": "error", "reason": "Unknown source Language"}, 400 | ||||
|     message_language = message.get_language() | ||||
|     if message_language != source: | ||||
|  |  | |||
|  | @ -103,8 +103,17 @@ class Message(AbstractObject): | |||
|         return message_id | ||||
| 
 | ||||
|     def get_chat_id(self):  # TODO optimize -> use me to tag Chat | ||||
|         chat_id = self.get_basename().rsplit('_', 1)[0] | ||||
|         return chat_id | ||||
|         c_id = self.id.split('/') | ||||
|         return c_id[2] | ||||
| 
 | ||||
|     def get_chat(self): | ||||
|         c_id = self.id.split('/') | ||||
|         return f'chat:{c_id[0]}:{c_id[2]}' | ||||
| 
 | ||||
|     def get_subchannel(self): | ||||
|         subchannel = self.get_correlation('chat-subchannel') | ||||
|         if subchannel.get('chat-subchannel'): | ||||
|             return f'user-account:{subchannel["chat-subchannel"].pop()}' | ||||
| 
 | ||||
|     def get_thread(self): | ||||
|         for child in self.get_childrens(): | ||||
|  | @ -183,25 +192,6 @@ class Message(AbstractObject): | |||
|         else: | ||||
|             return None | ||||
| 
 | ||||
|     def get_translation(self, content=None, source=None, target='fr'): | ||||
|         """ | ||||
|         Returns translated content | ||||
|         """ | ||||
| 
 | ||||
|         # return self._get_field('translated') | ||||
|         global_id = self.get_global_id() | ||||
|         translation = r_cache.get(f'translation:{target}:{global_id}') | ||||
|         r_cache.expire(f'translation:{target}:{global_id}', 0) | ||||
|         if translation: | ||||
|             return translation | ||||
|         if not content: | ||||
|             content = self.get_content() | ||||
|         translation = Language.LanguageTranslator().translate(content, source=source, target=target) | ||||
|         if translation: | ||||
|             r_cache.set(f'translation:{target}:{global_id}', translation) | ||||
|             r_cache.expire(f'translation:{target}:{global_id}', 300) | ||||
|         return translation | ||||
| 
 | ||||
|     def _set_translation(self, translation): | ||||
|         """ | ||||
|         Set translated content | ||||
|  | @ -305,6 +295,8 @@ class Message(AbstractObject): | |||
|             else: | ||||
|                 source = None | ||||
|             meta['translation'] = self.translate(content=meta.get('content'), source=source, target=translation_target) | ||||
|             if 'language' in options: | ||||
|                 meta['language'] = self.get_language() | ||||
| 
 | ||||
|         # meta['encoding'] = None | ||||
|         return meta | ||||
|  | @ -318,11 +310,29 @@ class Message(AbstractObject): | |||
|     #     self._set_translation(translated) | ||||
|     #     return translated | ||||
| 
 | ||||
|     def create(self, content, translation=None, tags=[]): | ||||
|     ## Language ## | ||||
| 
 | ||||
|     def get_objs_container(self): | ||||
|         objs_containers = set() | ||||
|         # chat | ||||
|         objs_containers.add(self.get_chat()) | ||||
|         subchannel = self.get_subchannel() | ||||
|         if subchannel: | ||||
|             objs_containers.add(subchannel) | ||||
|         # thread = self.get_thread() # TODO Get current thread | ||||
|         # if thread: | ||||
|         #     objs_containers.add(thread) | ||||
|         return objs_containers | ||||
| 
 | ||||
|     #- Language -# | ||||
| 
 | ||||
|     def create(self, content, language=None, translation=None, tags=[]): | ||||
|         self._set_field('content', content) | ||||
|         # r_content.get(f'content:{self.type}:{self.get_subtype(r_str=True)}:{self.id}', content) | ||||
|         if translation: | ||||
|         if not language and content: | ||||
|             language = self.detect_language() | ||||
|         if translation and content: | ||||
|             self._set_translation(translation) | ||||
|             self.set_translation(language, translation) | ||||
|         for tag in tags: | ||||
|             self.add_tag(tag) | ||||
| 
 | ||||
|  |  | |||
|  | @ -25,7 +25,7 @@ from lib import Duplicate | |||
| from lib.correlations_engine import get_nb_correlations, get_correlations, add_obj_correlation, delete_obj_correlation, delete_obj_correlations, exists_obj_correlation, is_obj_correlated, get_nb_correlation_by_correl_type, get_obj_inter_correlation | ||||
| from lib.Investigations import is_object_investigated, get_obj_investigations, delete_obj_investigations | ||||
| from lib.relationships_engine import get_obj_nb_relationships, add_obj_relationship | ||||
| from lib.Language import get_obj_languages, add_obj_language, remove_obj_language, detect_obj_language, get_obj_translation, set_obj_translation, delete_obj_translation | ||||
| from lib.Language import get_obj_languages, add_obj_language, remove_obj_language, detect_obj_language, get_obj_language_stats, get_obj_translation, set_obj_translation, delete_obj_translation | ||||
| from lib.Tracker import is_obj_tracked, get_obj_trackers, delete_obj_trackers | ||||
| 
 | ||||
| logging.config.dictConfig(ail_logger.get_config(name='ail')) | ||||
|  | @ -302,26 +302,33 @@ class AbstractObject(ABC): | |||
| 
 | ||||
|     ## -Relationship- ## | ||||
| 
 | ||||
|     def get_objs_container(self): | ||||
|         return set() | ||||
| 
 | ||||
|     ## Language ## | ||||
| 
 | ||||
|     def get_languages(self): | ||||
|         return get_obj_languages(self.type, self.get_subtype(r_str=True), self.id) | ||||
| 
 | ||||
|     def add_language(self, language): | ||||
|         return add_obj_language(language, self.type, self.get_subtype(r_str=True), self.id) | ||||
|         return add_obj_language(language, self.type, self.get_subtype(r_str=True), self.id, objs_containers=self.get_objs_container()) | ||||
| 
 | ||||
|     def remove_language(self, language): | ||||
|         return remove_obj_language(language, self.type, self.get_subtype(r_str=True), self.id) | ||||
|         return remove_obj_language(language, self.type, self.get_subtype(r_str=True), self.id, objs_containers=self.get_objs_container()) | ||||
| 
 | ||||
|     def edit_language(self, old_language, new_language): | ||||
|         self.remove_language(old_language) | ||||
|         if old_language: | ||||
|             self.remove_language(old_language) | ||||
|         self.add_language(new_language) | ||||
| 
 | ||||
|     def detect_language(self, field=''): | ||||
|         return detect_obj_language(self.type, self.get_subtype(r_str=True), self.id, self.get_content()) | ||||
|         return detect_obj_language(self.type, self.get_subtype(r_str=True), self.id, self.get_content(), objs_containers=self.get_objs_container()) | ||||
| 
 | ||||
|     def get_obj_language_stats(self): | ||||
|         return get_obj_language_stats(self.type, self.get_subtype(r_str=True), self.id) | ||||
| 
 | ||||
|     def get_translation(self, language, field=''): | ||||
|         return get_obj_translation(self.get_global_id(), language, field=field) | ||||
|         return get_obj_translation(self.get_global_id(), language, field=field, objs_containers=self.get_objs_container()) | ||||
| 
 | ||||
|     def set_translation(self, language, translation, field=''): | ||||
|         return set_obj_translation(self.get_global_id(), language, translation, field=field) | ||||
|  | @ -333,7 +340,7 @@ class AbstractObject(ABC): | |||
|         global_id = self.get_global_id() | ||||
|         if not content: | ||||
|             content = self.get_content() | ||||
|         translation = get_obj_translation(global_id, target, source=source, content=content, field=field) | ||||
|         translation = get_obj_translation(global_id, target, source=source, content=content, field=field, objs_containers=self.get_objs_container()) | ||||
|         return translation | ||||
| 
 | ||||
|     ## -Language- ## | ||||
|  |  | |||
|  | @ -253,7 +253,10 @@ def objects_message_translate(): | |||
|     if resp[1] != 200: | ||||
|         return create_json_response(resp[0], resp[1]) | ||||
|     else: | ||||
|         return redirect(url_for('chats_explorer.objects_message', id=message_id, target=target)) | ||||
|         if request.referrer: | ||||
|             return redirect(request.referrer) | ||||
|         else: | ||||
|             return redirect(url_for('chats_explorer.objects_message', id=message_id, target=target)) | ||||
| 
 | ||||
| @chats_explorer.route("/objects/message/detect/language", methods=['GET']) | ||||
| @login_required | ||||
|  | @ -265,7 +268,10 @@ def objects_message_detect_language(): | |||
|     if resp[1] != 200: | ||||
|         return create_json_response(resp[0], resp[1]) | ||||
|     else: | ||||
|         return redirect(url_for('chats_explorer.objects_message', id=message_id, target=target)) | ||||
|         if request.referrer: | ||||
|             return redirect(request.referrer) | ||||
|         else: | ||||
|             return redirect(url_for('chats_explorer.objects_message', id=message_id, target=target)) | ||||
| 
 | ||||
| @chats_explorer.route("/objects/user-account", methods=['GET']) | ||||
| @login_required | ||||
|  |  | |||
|  | @ -102,7 +102,7 @@ | |||
|             </span> | ||||
|             <div class="collapse" id="collapseTrans{{ mess_id_escape }}"> | ||||
|                 <div class="card card-body"> | ||||
|                     <form method="post" action="{{ url_for('chats_explorer.objects_message_translate') }}" target="_blank"> | ||||
|                     <form method="post" action="{{ url_for('chats_explorer.objects_message_translate') }}"> | ||||
|                         <input type="text" id="id" name="id" value="{{message['id']}}" hidden> | ||||
|                         <span class="badge badge-primary">Source:</span> | ||||
|                         <span class=""> | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	 terrtia
						terrtia