chg: [tags] search ocrs and images by tags + fix ocr, filter invalid image

pull/607/head
terrtia 2024-04-26 15:50:58 +02:00
parent 2b23d993df
commit 31b519cc17
No known key found for this signature in database
GPG Key ID: 1E1B1F50D84613D0
4 changed files with 57 additions and 2 deletions

View File

@ -101,7 +101,12 @@ class OcrExtractor(AbstractModule):
languages = get_model_languages(image) languages = get_model_languages(image)
languages = Ocrs.sanityze_ocr_languages(languages, ocr_languages=self.ocr_languages) languages = Ocrs.sanityze_ocr_languages(languages, ocr_languages=self.ocr_languages)
print(image.id, languages) print(image.id, languages)
try:
texts = Ocrs.extract_text(path, languages) texts = Ocrs.extract_text(path, languages)
except ValueError as e:
self.logger.warning(e)
self.obj.add_tag('infoleak:confirmed="false-positive"')
texts = None
if texts: if texts:
print('create') print('create')
ocr = Ocrs.create(image.id, texts) ocr = Ocrs.create(image.id, texts)

View File

@ -293,6 +293,24 @@ def tags_search_messages():
dict_tagged['date'] = Date.sanitise_date_range('', '', separator='-') dict_tagged['date'] = Date.sanitise_date_range('', '', separator='-')
return render_template("tags/search_obj_by_tags.html", bootstrap_label=bootstrap_label, dict_tagged=dict_tagged) return render_template("tags/search_obj_by_tags.html", bootstrap_label=bootstrap_label, dict_tagged=dict_tagged)
@tags_ui.route('/tag/search/image')
@login_required
@login_read_only
def tags_search_images():
object_type = 'image'
dict_tagged = {"object_type": object_type, "object_name": object_type.title() + "s"}
dict_tagged['date'] = Date.sanitise_date_range('', '', separator='-')
return render_template("tags/search_obj_by_tags.html", bootstrap_label=bootstrap_label, dict_tagged=dict_tagged)
@tags_ui.route('/tag/search/ocr')
@login_required
@login_read_only
def tags_search_ocrs():
object_type = 'ocr'
dict_tagged = {"object_type": object_type, "object_name": object_type.title() + "s"}
dict_tagged['date'] = Date.sanitise_date_range('', '', separator='-')
return render_template("tags/search_obj_by_tags.html", bootstrap_label=bootstrap_label, dict_tagged=dict_tagged)
@tags_ui.route('/tag/search/domain') @tags_ui.route('/tag/search/domain')
@login_required @login_required
@login_read_only @login_read_only

View File

@ -22,6 +22,18 @@
Search Messages by Tags Search Messages by Tags
</a> </a>
</li> </li>
<li class="nav-item">
<a class="nav-link" href="{{ url_for('tags_ui.tags_search_images') }}" id="nav_tags_search_image">
<i class="fas fa-image"></i>
Search Images by Tags
</a>
</li>
<li class="nav-item">
<a class="nav-link" href="{{ url_for('tags_ui.tags_search_ocrs') }}" id="nav_tags_search_message">
<i class="fas fa-expand"></i>
Search Ocrs by Tags
</a>
</li>
<li class="nav-item"> <li class="nav-item">
<a class="nav-link" href="{{ url_for('tags_ui.tags_search_domains') }}" id="nav_tags_search_domain"> <a class="nav-link" href="{{ url_for('tags_ui.tags_search_domains') }}" id="nav_tags_search_domain">
<i class="fab fa-html5"></i> <i class="fab fa-html5"></i>

View File

@ -126,6 +126,26 @@
</td> </td>
</tr> </tr>
{% endfor %} {% endfor %}
{%elif dict_tagged["object_type"]=="screenshot"%}
{%elif dict_tagged["object_type"]=="image" or dict_tagged["object_type"]=="ocr"%}
{% for dict_obj in dict_tagged["tagged_obj"] %}
<tr>
<td class="pb-0">
<a target="_blank" href="{{ url_for('correlation.show_correlation') }}?type={{dict_tagged['object_type']}}&id={{dict_obj['id']}}" class="text-secondary">
<div style="line-height:0.9;">{{ dict_obj['id'] }}</div>
</a>
<div class="mb-2">
{% for tag in dict_obj['tags'] %}
<a href="{{ url_for('tags_ui.get_obj_by_tags') }}?object_type={{dict_tagged['object_type']}}&ltags={{ tag }}">
<span class="badge badge-{{ bootstrap_label[loop.index0 % 5] }}">{{ tag }}</span>
</a>
{% endfor %}
</div>
</td>
</tr>
{% endfor %}
{%elif dict_tagged["object_type"]=="message"%} {%elif dict_tagged["object_type"]=="message"%}
{% for dict_obj in dict_tagged["tagged_obj"] %} {% for dict_obj in dict_tagged["tagged_obj"] %}
<tr> <tr>