mirror of https://github.com/CIRCL/AIL-framework
chg: [messages] manual translation by language + add new kvrocks lang namespace
parent
50bfd92105
commit
4cf3d628db
|
@ -17,6 +17,7 @@ from lib.ConfigLoader import ConfigLoader
|
||||||
|
|
||||||
config_loader = ConfigLoader()
|
config_loader = ConfigLoader()
|
||||||
r_cache = config_loader.get_redis_conn("Redis_Cache")
|
r_cache = config_loader.get_redis_conn("Redis_Cache")
|
||||||
|
r_lang = config_loader.get_db_conn("Kvrocks_Languages")
|
||||||
TRANSLATOR_URL = config_loader.get_config_str('Translation', 'libretranslate')
|
TRANSLATOR_URL = config_loader.get_config_str('Translation', 'libretranslate')
|
||||||
config_loader = None
|
config_loader = None
|
||||||
|
|
||||||
|
@ -256,9 +257,6 @@ def get_iso_from_languages(l_languages, sort=False):
|
||||||
return l_iso
|
return l_iso
|
||||||
|
|
||||||
|
|
||||||
class LanguageDetector:
|
|
||||||
pass
|
|
||||||
|
|
||||||
def get_translator_instance():
|
def get_translator_instance():
|
||||||
return TRANSLATOR_URL
|
return TRANSLATOR_URL
|
||||||
|
|
||||||
|
@ -299,25 +297,98 @@ def _clean_text_to_translate(content, html=False, keys_blocks=True):
|
||||||
content = content.replace(it, '')
|
content = content.replace(it, '')
|
||||||
return content
|
return content
|
||||||
|
|
||||||
#### AIL Objects ####
|
#### LANGUAGE ENGINE ####
|
||||||
|
|
||||||
def get_obj_translation(obj_global_id, content, field='', source=None, target='en'):
|
# first seen
|
||||||
|
# last seen
|
||||||
|
# language by date -> iter on object date ????
|
||||||
|
|
||||||
|
## Langs
|
||||||
|
def get_language_obj_types(language):
|
||||||
|
return r_lang.smembers(f'languages:{language}')
|
||||||
|
|
||||||
|
def get_language_objs(language, obj_type, obj_subtype=''):
|
||||||
|
return r_lang.smembers(f'langs:{obj_type}:{obj_subtype}:{language}')
|
||||||
|
|
||||||
|
# def get_languages_objs(languages, obj_type, obj_subtype='')
|
||||||
|
|
||||||
|
## Objs
|
||||||
|
def get_objs_languages(obj_type, obj_subtype=''):
|
||||||
|
if obj_subtype:
|
||||||
|
return r_lang.smembers(f'objs:lang:{obj_type}:{obj_subtype}')
|
||||||
|
else:
|
||||||
|
return r_lang.smembers(f'objs:langs:{obj_type}')
|
||||||
|
|
||||||
|
## Obj
|
||||||
|
def get_obj_languages(obj_type, obj_subtype, obj_id):
|
||||||
|
return r_lang.smembers(f'obj:lang:{obj_type}:{obj_subtype}:{obj_id}')
|
||||||
|
|
||||||
|
# TODO ADD language to CHAT GLOBAL SET
|
||||||
|
def add_obj_language(language, obj_type, obj_subtype, obj_id): # (s)
|
||||||
|
if not obj_subtype:
|
||||||
|
obj_subtype = ''
|
||||||
|
obj_global_id = f'{obj_type}:{obj_subtype}:{obj_id}'
|
||||||
|
|
||||||
|
r_lang.sadd(f'objs:langs:{obj_type}', language)
|
||||||
|
r_lang.sadd(f'objs:lang:{obj_type}:{obj_subtype}', language)
|
||||||
|
r_lang.sadd(f'obj:lang:{obj_global_id}', language)
|
||||||
|
|
||||||
|
r_lang.sadd(f'languages:{language}', f'{obj_type}:{obj_subtype}') ################### REMOVE ME ???
|
||||||
|
r_lang.sadd(f'langs:{obj_type}:{obj_subtype}:{language}', obj_global_id)
|
||||||
|
|
||||||
|
def remove_obj_language(language, obj_type, obj_subtype, obj_id):
|
||||||
|
if not obj_subtype:
|
||||||
|
obj_subtype = ''
|
||||||
|
obj_global_id = f'{obj_type}:{obj_subtype}:{obj_id}'
|
||||||
|
r_lang.srem(f'obj:lang:{obj_global_id}', language)
|
||||||
|
|
||||||
|
r_lang.srem(f'langs:{obj_type}:{obj_subtype}:{language}', obj_global_id)
|
||||||
|
if not r_lang.exists(f'langs:{obj_type}:{obj_subtype}:{language}'):
|
||||||
|
r_lang.srem(f'objs:lang:{obj_type}:{obj_subtype}', language)
|
||||||
|
r_lang.srem(f'languages:{language}', f'{obj_type}:{obj_subtype}')
|
||||||
|
if not r_lang.exists(f'objs:lang:{obj_type}:{obj_subtype}'):
|
||||||
|
if r_lang.scard(f'objs:langs:{obj_type}', language) <= 1:
|
||||||
|
r_lang.srem(f'objs:langs:{obj_type}', language)
|
||||||
|
|
||||||
|
def edit_obj_language(language, obj_type, obj_subtype, obj_id):
|
||||||
|
remove_obj_language(language, obj_type, obj_subtype, obj_id)
|
||||||
|
add_obj_language(language, obj_type, obj_subtype, obj_id)
|
||||||
|
|
||||||
|
|
||||||
|
## Translation
|
||||||
|
def _get_obj_translation(obj_global_id, language, field=''):
|
||||||
|
return r_lang.hget(f'tr:{obj_global_id}:{field}', language)
|
||||||
|
|
||||||
|
def get_obj_translation(obj_global_id, language, source=None, content=None, field=''):
|
||||||
"""
|
"""
|
||||||
Returns translated content
|
Returns translated content
|
||||||
"""
|
"""
|
||||||
translation = r_cache.get(f'translation:{target}:{obj_global_id}:{field}')
|
translation = r_cache.get(f'translation:{language}:{obj_global_id}:{field}')
|
||||||
if translation:
|
if translation:
|
||||||
# DEBUG
|
# DEBUG
|
||||||
# print('cache')
|
# print('cache')
|
||||||
# r_cache.expire(f'translation:{target}:{obj_global_id}:{field}', 0)
|
# r_cache.expire(f'translation:{language}:{obj_global_id}:{field}', 0)
|
||||||
return translation
|
return translation
|
||||||
translation = LanguageTranslator().translate(content, source=source, target=target)
|
# TODO HANDLE FIELDS TRANSLATION
|
||||||
|
translation = _get_obj_translation(obj_global_id, language, field=field)
|
||||||
|
if not translation:
|
||||||
|
translation = LanguageTranslator().translate(content, source=source, target=language)
|
||||||
if translation:
|
if translation:
|
||||||
r_cache.set(f'translation:{target}:{obj_global_id}:{field}', translation)
|
r_cache.set(f'translation:{language}:{obj_global_id}:{field}', translation)
|
||||||
r_cache.expire(f'translation:{target}:{obj_global_id}:{field}', 300)
|
r_cache.expire(f'translation:{language}:{obj_global_id}:{field}', 300)
|
||||||
return translation
|
return translation
|
||||||
|
|
||||||
## --AIL Objects-- ##
|
|
||||||
|
# TODO Force to edit ????
|
||||||
|
def set_obj_translation(obj_global_id, language, translation, field=''):
|
||||||
|
r_cache.delete(f'translation:{language}:{obj_global_id}:')
|
||||||
|
return r_lang.hset(f'tr:{obj_global_id}:{field}', language, translation)
|
||||||
|
|
||||||
|
|
||||||
|
## --LANGUAGE ENGINE-- ##
|
||||||
|
|
||||||
|
|
||||||
|
#### AIL Objects ####
|
||||||
|
|
||||||
class LanguagesDetector:
|
class LanguagesDetector:
|
||||||
|
|
||||||
|
|
|
@ -400,6 +400,19 @@ def api_get_message(message_id, translation_target=None):
|
||||||
meta = message.get_meta({'chat', 'content', 'files-names', 'icon', 'images', 'link', 'parent', 'parent_meta', 'reactions', 'thread', 'translation', 'user-account'}, translation_target=translation_target)
|
meta = message.get_meta({'chat', 'content', 'files-names', 'icon', 'images', 'link', 'parent', 'parent_meta', 'reactions', 'thread', 'translation', 'user-account'}, translation_target=translation_target)
|
||||||
return meta, 200
|
return meta, 200
|
||||||
|
|
||||||
|
def api_manually_translate_message(message_id, translation_target, translation):
|
||||||
|
message = Messages.Message(message_id)
|
||||||
|
if not message.exists():
|
||||||
|
return {"status": "error", "reason": "Unknown uuid"}, 404
|
||||||
|
if len(translation) > 200000: # TODO REVIEW LIMIT
|
||||||
|
return {"status": "error", "reason": "Max Size reached"}, 400
|
||||||
|
if translation_target not in Language.get_translation_languages():
|
||||||
|
return {"status": "error", "reason": "Unknown Language"}, 400
|
||||||
|
if translation:
|
||||||
|
message.set_translation(translation_target, translation)
|
||||||
|
# TODO SANITYZE translation
|
||||||
|
return None, 200
|
||||||
|
|
||||||
def api_get_user_account(user_id, instance_uuid, translation_target=None):
|
def api_get_user_account(user_id, instance_uuid, translation_target=None):
|
||||||
user_account = UsersAccount.UserAccount(user_id, instance_uuid)
|
user_account = UsersAccount.UserAccount(user_id, instance_uuid)
|
||||||
if not user_account.exists():
|
if not user_account.exists():
|
||||||
|
|
|
@ -25,7 +25,7 @@ from lib import Duplicate
|
||||||
from lib.correlations_engine import get_nb_correlations, get_correlations, add_obj_correlation, delete_obj_correlation, delete_obj_correlations, exists_obj_correlation, is_obj_correlated, get_nb_correlation_by_correl_type, get_obj_inter_correlation
|
from lib.correlations_engine import get_nb_correlations, get_correlations, add_obj_correlation, delete_obj_correlation, delete_obj_correlations, exists_obj_correlation, is_obj_correlated, get_nb_correlation_by_correl_type, get_obj_inter_correlation
|
||||||
from lib.Investigations import is_object_investigated, get_obj_investigations, delete_obj_investigations
|
from lib.Investigations import is_object_investigated, get_obj_investigations, delete_obj_investigations
|
||||||
from lib.relationships_engine import get_obj_nb_relationships, add_obj_relationship
|
from lib.relationships_engine import get_obj_nb_relationships, add_obj_relationship
|
||||||
from lib.Language import get_obj_translation
|
from lib.Language import get_obj_languages, add_obj_language, remove_obj_language, get_obj_translation, set_obj_translation
|
||||||
from lib.Tracker import is_obj_tracked, get_obj_trackers, delete_obj_trackers
|
from lib.Tracker import is_obj_tracked, get_obj_trackers, delete_obj_trackers
|
||||||
|
|
||||||
logging.config.dictConfig(ail_logger.get_config(name='ail'))
|
logging.config.dictConfig(ail_logger.get_config(name='ail'))
|
||||||
|
@ -302,15 +302,31 @@ class AbstractObject(ABC):
|
||||||
|
|
||||||
## -Relationship- ##
|
## -Relationship- ##
|
||||||
|
|
||||||
## Translation ##
|
## Language ##
|
||||||
|
|
||||||
|
def get_languages(self):
|
||||||
|
return get_obj_languages(self.type, self.get_subtype(r_str=True), self.id)
|
||||||
|
|
||||||
|
def add_language(self, language):
|
||||||
|
return add_obj_language(language, self.type, self.get_subtype(r_str=True), self.id)
|
||||||
|
|
||||||
|
def remove_language(self, language):
|
||||||
|
return remove_obj_language(language, self.type, self.get_subtype(r_str=True), self.id)
|
||||||
|
|
||||||
|
def get_translation(self, language, field=''):
|
||||||
|
return get_obj_translation(self.get_global_id(), language, field=field)
|
||||||
|
|
||||||
|
def set_translation(self, language, translation, field=''):
|
||||||
|
return set_obj_translation(self.get_global_id(), language, translation, field=field)
|
||||||
|
|
||||||
def translate(self, content=None, field='', source=None, target='en'):
|
def translate(self, content=None, field='', source=None, target='en'):
|
||||||
global_id = self.get_global_id()
|
global_id = self.get_global_id()
|
||||||
if not content:
|
if not content:
|
||||||
content = self.get_content()
|
content = self.get_content()
|
||||||
return get_obj_translation(global_id, content, field=field, source=source, target=target)
|
translation = get_obj_translation(global_id, target, source=source, content=content, field=field)
|
||||||
|
return translation
|
||||||
|
|
||||||
## -Translation- ##
|
## -Language- ##
|
||||||
|
|
||||||
## Parent ##
|
## Parent ##
|
||||||
|
|
||||||
|
|
|
@ -903,6 +903,7 @@ namespace.cor ail_correls
|
||||||
namespace.crawl ail_crawlers
|
namespace.crawl ail_crawlers
|
||||||
namespace.db ail_datas
|
namespace.db ail_datas
|
||||||
namespace.dup ail_dups
|
namespace.dup ail_dups
|
||||||
|
namespace.lg ail_langs
|
||||||
namespace.obj ail_objs
|
namespace.obj ail_objs
|
||||||
namespace.rel ail_rels
|
namespace.rel ail_rels
|
||||||
namespace.stat ail_stats
|
namespace.stat ail_stats
|
||||||
|
|
|
@ -191,6 +191,11 @@ host = localhost
|
||||||
port = 6383
|
port = 6383
|
||||||
password = ail_crawlers
|
password = ail_crawlers
|
||||||
|
|
||||||
|
[Kvrocks_Languages]
|
||||||
|
host = localhost
|
||||||
|
port = 6383
|
||||||
|
password = ail_langs
|
||||||
|
|
||||||
[Kvrocks_Objects]
|
[Kvrocks_Objects]
|
||||||
host = localhost
|
host = localhost
|
||||||
port = 6383
|
port = 6383
|
||||||
|
|
|
@ -221,6 +221,21 @@ def objects_message():
|
||||||
translation_languages=languages, translation_target=target,
|
translation_languages=languages, translation_target=target,
|
||||||
modal_add_tags=Tag.get_modal_add_tags(message['id'], object_type='message'))
|
modal_add_tags=Tag.get_modal_add_tags(message['id'], object_type='message'))
|
||||||
|
|
||||||
|
@chats_explorer.route("/objects/message/translate", methods=['POST'])
|
||||||
|
@login_required
|
||||||
|
@login_read_only
|
||||||
|
def objects_message_translate():
|
||||||
|
message_id = request.form.get('id')
|
||||||
|
target = request.form.get('target')
|
||||||
|
translation = request.form.get('translation')
|
||||||
|
if target == "Don't Translate":
|
||||||
|
target = None
|
||||||
|
resp = chats_viewer.api_manually_translate_message(message_id, target, translation)
|
||||||
|
if resp[1] != 200:
|
||||||
|
return create_json_response(resp[0], resp[1])
|
||||||
|
else:
|
||||||
|
return redirect(url_for('chats_explorer.objects_message', id=message_id, target=target))
|
||||||
|
|
||||||
@chats_explorer.route("/objects/user-account", methods=['GET'])
|
@chats_explorer.route("/objects/user-account", methods=['GET'])
|
||||||
@login_required
|
@login_required
|
||||||
@login_read_only
|
@login_read_only
|
||||||
|
|
|
@ -80,6 +80,25 @@
|
||||||
{% if message['translation'] %}
|
{% if message['translation'] %}
|
||||||
<hr class="m-1">
|
<hr class="m-1">
|
||||||
<pre class="my-0 text-secondary">{{ message['translation'] }}</pre>
|
<pre class="my-0 text-secondary">{{ message['translation'] }}</pre>
|
||||||
|
|
||||||
|
{% set mess_id_escape= message['id'] | replace("/", "_") %}
|
||||||
|
<button class="btn btn-light p-0" type="button" data-toggle="collapse" data-target="#collapseTrans{{ mess_id_escape }}" aria-expanded="false" aria-controls="collapseTrans{{ mess_id_escape }}">
|
||||||
|
<i class="fas fa-language"></i>
|
||||||
|
</button>
|
||||||
|
<div class="collapse" id="collapseTrans{{ mess_id_escape }}">
|
||||||
|
<div class="card card-body">
|
||||||
|
<form method="post" action="{{ url_for('chats_explorer.objects_message_translate') }}" target="_blank">
|
||||||
|
<input type="text" id="id" name="id" value="{{message['id']}}" hidden>
|
||||||
|
<input type="text" id="target" name="target" value="{{translation_target}}" hidden>
|
||||||
|
<span>{{translation_target}}:</span>
|
||||||
|
<textarea class="form-control" id="translation" name="translation">{{ message['translation'] }}</textarea>
|
||||||
|
<button class="btn btn-primary" type="submit">
|
||||||
|
<i class="fas fa-pen-alt"> Manual Translation</i>
|
||||||
|
</button>
|
||||||
|
</form>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
{% endif %}
|
{% endif %}
|
||||||
{% for reaction in message['reactions'] %}
|
{% for reaction in message['reactions'] %}
|
||||||
<span class="border rounded px-1">{{ reaction }} {{ message['reactions'][reaction] }}</span>
|
<span class="border rounded px-1">{{ reaction }} {{ message['reactions'][reaction] }}</span>
|
||||||
|
|
Loading…
Reference in New Issue