diff --git a/bin/crawlers/Crawler.py b/bin/crawlers/Crawler.py index 9d1514e8..7ddcc5e6 100755 --- a/bin/crawlers/Crawler.py +++ b/bin/crawlers/Crawler.py @@ -192,6 +192,7 @@ class Crawler(AbstractModule): # force=force, # general_timeout_in_sec=120) + # with_favicon = True, capture_uuid = self.lacus.enqueue(url=url, depth=task.get_depth(), user_agent=task.get_user_agent(), @@ -274,8 +275,9 @@ class Crawler(AbstractModule): for tag in task.get_tags(): self.domain.add_tag(tag) self.original_domain.add_history(epoch, root_item=self.root_item) - crawlers.update_last_crawled_domain(self.original_domain.get_domain_type(), self.original_domain.id, epoch) + # crawlers.update_last_crawled_domain(self.original_domain.get_domain_type(), self.original_domain.id, epoch) + self.domain.update_vanity_cluster() crawlers.update_last_crawled_domain(self.domain.get_domain_type(), self.domain.id, epoch) print('capture:', capture.uuid, 'completed') print('task: ', task.uuid, 'completed') diff --git a/bin/lib/objects/Domains.py b/bin/lib/objects/Domains.py index 3895aa54..f4c800e4 100755 --- a/bin/lib/objects/Domains.py +++ b/bin/lib/objects/Domains.py @@ -411,6 +411,10 @@ class Domain(AbstractObject): r_crawler.sadd(f'language:domains:{self.domain_type}:{language}', self.id) r_crawler.sadd(f'domain:language:{self.id}', language) + def update_vanity_cluster(self): + if self.get_domain_type() == 'onion': + update_vanity_cluster(self.id) + ############################################################################ ############################################################################ @@ -644,10 +648,71 @@ def api_search_domains_by_name(name_to_search, domain_types, meta=False, page=1) ################################################################################ ################################################################################ +#### Vanity Explorer #### + +# TODO ADD ME IN OBJ CLASS +def get_domain_vanity(domain, len_vanity=4): + return domain[:len_vanity] + +def get_vanity_clusters(nb_min=4): + return r_crawler.zrange('vanity:onion:4', nb_min, '+inf', byscore=True, withscores=True) + +def get_vanity_domains(vanity, len_vanity=4, meta=False): + if len_vanity == 4: + domains = r_crawler.smembers(f'vanity:{int(len_vanity)}:{vanity}') + else: + domains = [] + for domain in r_crawler.smembers(f'vanity:4:{vanity[:4]}'): + dom_vanity = get_domain_vanity(domain, len_vanity=len_vanity) + if vanity == dom_vanity: + domains.append(domain) + if meta: + metas = [] + for domain in domains: + metas.append(Domain(domain).get_meta(options={'languages', 'screenshot', 'tags_safe'})) + return metas + else: + return domains + +def get_vanity_cluster(vanity, len_vanity=4, nb_min=4): + if len_vanity == 4: + return get_vanity_clusters(nb_min=nb_min) + else: + clusters = {} + for domain in get_vanity_domains(vanity[:4], len_vanity=4): + new_vanity = get_domain_vanity(domain, len_vanity=len_vanity) + if vanity not in clusters: + clusters[new_vanity] = 0 + clusters[new_vanity] += 1 + to_remove = [] + for new_vanity in clusters: + if clusters[new_vanity] < nb_min: + to_remove.append(new_vanity) + for new_vanity in to_remove: + del clusters[new_vanity] + return clusters + +def get_vanity_nb_domains(vanity, len_vanity=4): + return r_crawler.scard(f'vanity:{int(len_vanity)}:{vanity}') + +# TODO BUILD DICTIONARY +def update_vanity_cluster(domain): + vanity = get_domain_vanity(domain, len_vanity=4) + add = r_crawler.sadd(f'vanity:4:{vanity}', domain) + if add == 1: + r_crawler.zadd('vanity:onion:4', {vanity: 1}) + +def _rebuild_vanity_clusters(): + for vanity in r_crawler.zrange('vanity:onion:4', 0, -1): + r_crawler.delete(f'vanity:4:{vanity}') + r_crawler.delete('vanity:onion:4') + for domain in get_domains_up_by_type('onion'): + update_vanity_cluster(domain) + def cluster_onion_domain_vanity(len_vanity=4): domains = {} occurrences = {} - for domain in get_domains_up_by_type('web'): + for domain in get_domains_up_by_type('onion'): start = domain[:len_vanity] if start not in domains: domains[start] = [] @@ -659,8 +724,6 @@ def cluster_onion_domain_vanity(len_vanity=4): res = dict(sorted(occurrences.items(), key=lambda item: item[1], reverse=True)) print(json.dumps(res)) -################################################################################ -################################################################################ if __name__ == '__main__': - cluster_onion_domain_vanity(len_vanity=4) + _rebuild_vanity_clusters() diff --git a/var/www/blueprints/crawler_splash.py b/var/www/blueprints/crawler_splash.py index c8983ace..31a1e647 100644 --- a/var/www/blueprints/crawler_splash.py +++ b/var/www/blueprints/crawler_splash.py @@ -576,6 +576,37 @@ def domains_search_date_post(): type=domain_type, down=down, up=up)) +@crawler_splash.route('/domains/explorer/vanity', methods=['GET']) +@login_required +@login_analyst +def domains_explorer_vanity_clusters(): + nb_min = request.args.get('min', 0) + if int(nb_min) < 0: + nb_min = 4 + vanity_clusters = Domains.get_vanity_clusters(nb_min=nb_min) + return render_template("explorer_vanity_clusters.html", vanity_clusters=vanity_clusters, + length=4) + +@crawler_splash.route('/domains/explorer/vanity/explore', methods=['GET']) +@login_required +@login_analyst +def domains_explorer_vanity_explore(): + vanity = request.args.get('vanity') + nb_min = request.args.get('min', 0) # TODO SHOW DOMAINS OPTIONS + HARD CODED DOMAINS LIMIT FOR RENDER + length = len(vanity) + if int(nb_min) < 0: + nb_min = 4 + vanity_clusters = Domains.get_vanity_cluster(vanity, len_vanity=length+1, nb_min=nb_min) + vanity_domains = Domains.get_vanity_domains(vanity, len_vanity=length, meta=True) + vanities_tree = [] + for i in range(4, length): + vanities_tree.append(vanity[:i]) + if length == len(vanity): + vanities_tree.append(vanity) + return render_template("explorer_vanity_domains.html", vanity_clusters=vanity_clusters, + bootstrap_label=bootstrap_label, vanity=vanity, vanities_tree=vanities_tree, + vanity_domains=vanity_domains, length=length) + ##-- --## diff --git a/var/www/blueprints/objects_item.py b/var/www/blueprints/objects_item.py index e3ae8d18..29e6a2de 100644 --- a/var/www/blueprints/objects_item.py +++ b/var/www/blueprints/objects_item.py @@ -56,7 +56,7 @@ def screenshot(filename): abort(404) filename = filename.replace('/', '') s = Screenshot(filename) - return send_from_directory(SCREENSHOT_FOLDER, s.get_rel_path(add_extension=True), as_attachment=True) + return send_from_directory(SCREENSHOT_FOLDER, s.get_rel_path(add_extension=True), as_attachment=False, mimetype='image') @objects_item.route("/object/item") @login_required diff --git a/var/www/templates/chats_explorer/block_message.html b/var/www/templates/chats_explorer/block_message.html index 3343210a..811a642e 100644 --- a/var/www/templates/chats_explorer/block_message.html +++ b/var/www/templates/chats_explorer/block_message.html @@ -16,7 +16,7 @@ height: 2px; background: #eee; } - .message_image { + .object_image { max-width: 50%; filter: blur(5px); } @@ -66,7 +66,7 @@ {% endif %} {% if message['images'] %} {% for message_image in message['images'] %} - + {% endfor %} {% endif %} {% if message['files-names'] %} diff --git a/var/www/templates/crawler/crawler_splash/domain_explorer.html b/var/www/templates/crawler/crawler_splash/domain_explorer.html index b3ed158f..e0e59275 100644 --- a/var/www/templates/crawler/crawler_splash/domain_explorer.html +++ b/var/www/templates/crawler/crawler_splash/domain_explorer.html @@ -41,26 +41,8 @@
-
-
-
-
- -
-
- -
-
- -
-
-
+
+ {% include 'objects/image/block_blur_img_slider.html' %}
@@ -144,85 +126,4 @@ function toggle_sidebar(){ } - - - diff --git a/var/www/templates/crawler/crawler_splash/explorer_vanity_clusters.html b/var/www/templates/crawler/crawler_splash/explorer_vanity_clusters.html new file mode 100644 index 00000000..fa7b3062 --- /dev/null +++ b/var/www/templates/crawler/crawler_splash/explorer_vanity_clusters.html @@ -0,0 +1,94 @@ + + + + Vanity Explorer - AIL + + + + + + + + + + + + + + + + + + {% include 'nav_bar.html' %} + +
+
+ + {% include 'crawler/menu_sidebar.html' %} + +
+ +

Vanity Explorer

+ + + + + + + + + + + {% for row in vanity_clusters %} + + + + + + {% endfor %} + +
VanityNB Domains
+ {{ row[0] }} + {{ row[1] | int }} + +
+ + +
+
+
+ + + + + + diff --git a/var/www/templates/crawler/crawler_splash/explorer_vanity_domains.html b/var/www/templates/crawler/crawler_splash/explorer_vanity_domains.html new file mode 100644 index 00000000..5a251a78 --- /dev/null +++ b/var/www/templates/crawler/crawler_splash/explorer_vanity_domains.html @@ -0,0 +1,137 @@ + + + + Vanity Explorer - AIL + + + + + + + + + + + + + + + + + + {% include 'nav_bar.html' %} + +
+
+ + {% include 'crawler/menu_sidebar.html' %} + +
+ +

Vanity Explorer:

+ +
+
+ +
+ {% if vanities_tree | length > 1 %} + + {% for vanity in vanities_tree[:-1] %} + {{ vanity }} + {% endfor %} + {{ vanities_tree[-1] }} + {% else %} + + {% for vanity in vanities_tree %} + {{ vanity }} + {% endfor %} + {% endif %} +
+
Vanity Length: {{ length }}
+ +
+
+ + + + + + + + + + + {% for row in vanity_clusters %} + + + + + + {% endfor %} + +
Length+1 VanitiesNB Domains
+ {{ row }} + {{ vanity_clusters[row] }} + +
+ +
+
+ + + {% if vanity_domains %} +
+ {% include 'objects/image/block_blur_img_slider.html' %} +
+ +
+
+

{{ vanity }} {{ vanity_domains | length }}

+
+
+ {% with l_dict_domains=vanity_domains, bootstrap_label=bootstrap_label %} + {% include 'domains/card_img_domain.html' %} + {% endwith %} +
+
+ + {% endif %} + +
+
+
+ + + + + + diff --git a/var/www/templates/crawler/menu_sidebar.html b/var/www/templates/crawler/menu_sidebar.html index 0328f868..819e319a 100644 --- a/var/www/templates/crawler/menu_sidebar.html +++ b/var/www/templates/crawler/menu_sidebar.html @@ -62,6 +62,12 @@ Web Domain +
- -
@@ -54,7 +54,7 @@ }); function searchLanguages() { - var all_domain_types = ['onion', 'regular'] // TODO: load from flask + var all_domain_types = ['onion', 'web'] // TODO: load from flask var l_domains_types = []; var data = llanguages.getValue(); diff --git a/var/www/templates/domains/card_img_domain.html b/var/www/templates/domains/card_img_domain.html index eff8825d..f0afaef6 100644 --- a/var/www/templates/domains/card_img_domain.html +++ b/var/www/templates/domains/card_img_domain.html @@ -1,3 +1,11 @@ + + {% for dict_domain in l_dict_domains %} {% if loop.index0 % 4 == 0 %}
@@ -6,7 +14,7 @@
{% if dict_domain["is_tags_safe"] %} - + {% else %} diff --git a/var/www/templates/domains/domains_filter_languages.html b/var/www/templates/domains/domains_filter_languages.html index 8f8ba747..efbff0db 100644 --- a/var/www/templates/domains/domains_filter_languages.html +++ b/var/www/templates/domains/domains_filter_languages.html @@ -38,27 +38,9 @@
-
-
-
-
- -
-
- -
-
- -
-
-
-
+
+ {% include 'objects/image/block_blur_img_slider.html' %} +
@@ -109,84 +91,4 @@ function toggle_sidebar(){ } - - - diff --git a/var/www/templates/domains/domains_result_list.html b/var/www/templates/domains/domains_result_list.html index cbcdb62c..cd9da59a 100644 --- a/var/www/templates/domains/domains_result_list.html +++ b/var/www/templates/domains/domains_result_list.html @@ -37,26 +37,8 @@ {% endwith %}
-
-
-
-
- -
-
- -
-
- -
-
-
+
+ {% include 'objects/image/block_blur_img_slider.html' %}
@@ -104,84 +86,4 @@ function toggle_sidebar(){ } - - diff --git a/var/www/templates/objects/image/block_blur_img_slider.html b/var/www/templates/objects/image/block_blur_img_slider.html index 1341f877..c13372f7 100644 --- a/var/www/templates/objects/image/block_blur_img_slider.html +++ b/var/www/templates/objects/image/block_blur_img_slider.html @@ -26,7 +26,7 @@ function blur_images(){ let blurValue = blur_slider.val(); blurValue = 15 - blurValue; - let images = document.getElementsByClassName('message_image'); + let images = document.getElementsByClassName('object_image'); for(i = 0; i < images.length; i++) { images[i].style.filter = "blur(" + blurValue + "px)"; }