mirror of https://github.com/CIRCL/AIL-framework
chg: [Languages]detect + search domains languages
parent
28f6963ff4
commit
6bc54baf74
|
@ -216,6 +216,8 @@ function launching_scripts {
|
||||||
sleep 0.1
|
sleep 0.1
|
||||||
screen -S "Script_AIL" -X screen -t "Tags" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Tags.py; read x"
|
screen -S "Script_AIL" -X screen -t "Tags" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Tags.py; read x"
|
||||||
sleep 0.1
|
sleep 0.1
|
||||||
|
screen -S "Script_AIL" -X screen -t "Languages" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Languages.py; read x"
|
||||||
|
sleep 0.1
|
||||||
screen -S "Script_AIL" -X screen -t "SentimentAnalysis" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./SentimentAnalysis.py; read x"
|
screen -S "Script_AIL" -X screen -t "SentimentAnalysis" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./SentimentAnalysis.py; read x"
|
||||||
sleep 0.1
|
sleep 0.1
|
||||||
screen -S "Script_AIL" -X screen -t "DbCleaner" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./DbCleaner.py; read x"
|
screen -S "Script_AIL" -X screen -t "DbCleaner" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./DbCleaner.py; read x"
|
||||||
|
|
|
@ -0,0 +1,33 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*-coding:UTF-8 -*
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
import cld3
|
||||||
|
|
||||||
|
from packages import Item
|
||||||
|
from lib import Domain
|
||||||
|
|
||||||
|
from pubsublogger import publisher
|
||||||
|
from Helper import Process
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
publisher.port = 6380
|
||||||
|
publisher.channel = 'Script'
|
||||||
|
# Section name in bin/packages/modules.cfg
|
||||||
|
config_section = 'Languages'
|
||||||
|
# Setup the I/O queues
|
||||||
|
p = Process(config_section)
|
||||||
|
|
||||||
|
while True:
|
||||||
|
message = p.get_from_set()
|
||||||
|
if message is None:
|
||||||
|
publisher.debug("{} queue is empty, waiting".format(config_section))
|
||||||
|
time.sleep(1)
|
||||||
|
continue
|
||||||
|
|
||||||
|
item_id = Item.get_item_id(message)
|
||||||
|
if Item.is_crawled(item_id):
|
||||||
|
domain = Item.get_item_domain(item_id)
|
||||||
|
Domain.add_domain_languages_by_item_id(domain, item_id)
|
|
@ -9,6 +9,7 @@ The ``Domain``
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
import itertools
|
||||||
import time
|
import time
|
||||||
import redis
|
import redis
|
||||||
import random
|
import random
|
||||||
|
@ -24,6 +25,7 @@ import Tag
|
||||||
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
|
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
|
||||||
import ConfigLoader
|
import ConfigLoader
|
||||||
import Correlate_object
|
import Correlate_object
|
||||||
|
import Language
|
||||||
import Screenshot
|
import Screenshot
|
||||||
import Username
|
import Username
|
||||||
|
|
||||||
|
@ -66,6 +68,15 @@ def sanitize_domain_type(domain_type):
|
||||||
else:
|
else:
|
||||||
return 'regular'
|
return 'regular'
|
||||||
|
|
||||||
|
def sanitize_domain_types(l_domain_type):
|
||||||
|
all_domain_types = get_all_domains_type()
|
||||||
|
if not l_domain_type:
|
||||||
|
return all_domain_types
|
||||||
|
for domain_type in l_domain_type:
|
||||||
|
if domain_type not in all_domain_types:
|
||||||
|
return all_domain_types
|
||||||
|
return l_domain_type
|
||||||
|
|
||||||
######## DOMAINS ########
|
######## DOMAINS ########
|
||||||
def get_all_domains_type():
|
def get_all_domains_type():
|
||||||
return ['onion', 'regular']
|
return ['onion', 'regular']
|
||||||
|
@ -210,6 +221,15 @@ def get_domains_up_by_filers(domain_type, date_from=None, date_to=None, tags=[],
|
||||||
else:
|
else:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## TODO: filters:
|
||||||
|
# - tags
|
||||||
|
# - languages
|
||||||
|
# - daterange UP
|
||||||
|
def get_domains_by_filters():
|
||||||
|
pass
|
||||||
|
|
||||||
def create_domains_metadata_list(list_domains, domain_type):
|
def create_domains_metadata_list(list_domains, domain_type):
|
||||||
l_domains = []
|
l_domains = []
|
||||||
for domain in list_domains:
|
for domain in list_domains:
|
||||||
|
@ -218,9 +238,98 @@ def create_domains_metadata_list(list_domains, domain_type):
|
||||||
else:
|
else:
|
||||||
dom_type = domain_type
|
dom_type = domain_type
|
||||||
l_domains.append(get_domain_metadata(domain, dom_type, first_seen=True, last_ckeck=True, status=True,
|
l_domains.append(get_domain_metadata(domain, dom_type, first_seen=True, last_ckeck=True, status=True,
|
||||||
ports=True, tags=True, screenshot=True, tags_safe=True))
|
ports=True, tags=True, languages=True, screenshot=True, tags_safe=True))
|
||||||
return l_domains
|
return l_domains
|
||||||
|
|
||||||
|
|
||||||
|
######## LANGUAGES ########
|
||||||
|
def get_all_domains_languages():
|
||||||
|
return r_serv_onion.smembers('all_domains_languages')
|
||||||
|
|
||||||
|
def get_domains_by_languages(languages, l_domain_type=[]):
|
||||||
|
l_domain_type = sanitize_domain_types(l_domain_type)
|
||||||
|
if not languages:
|
||||||
|
return []
|
||||||
|
elif len(languages) == 1:
|
||||||
|
return get_all_domains_by_language(languages[0], l_domain_type=l_domain_type)
|
||||||
|
else:
|
||||||
|
all_domains_t = []
|
||||||
|
for domain_type in l_domain_type:
|
||||||
|
l_keys_name = []
|
||||||
|
for language in languages:
|
||||||
|
l_keys_name.append('language:domains:{}:{}'.format(domain_type, language))
|
||||||
|
res = r_serv_onion.sinter(l_keys_name[0], *l_keys_name[1:])
|
||||||
|
if res:
|
||||||
|
all_domains_t.append(res)
|
||||||
|
return list(itertools.chain.from_iterable(all_domains_t))
|
||||||
|
|
||||||
|
def get_all_domains_by_language(language, l_domain_type=[]):
|
||||||
|
l_domain_type = sanitize_domain_types(l_domain_type)
|
||||||
|
if len(l_domain_type) == 1:
|
||||||
|
return r_serv_onion.smembers('language:domains:{}:{}'.format(l_domain_type[0], language))
|
||||||
|
else:
|
||||||
|
l_keys_name = []
|
||||||
|
for domain_type in l_domain_type:
|
||||||
|
l_keys_name.append('language:domains:{}:{}'.format(domain_type, language))
|
||||||
|
return r_serv_onion.sunion(l_keys_name[0], *l_keys_name[1:])
|
||||||
|
|
||||||
|
def get_domain_languages(domain, r_list=False):
|
||||||
|
res = r_serv_onion.smembers('domain:language:{}'.format(domain))
|
||||||
|
if r_list:
|
||||||
|
return list(res)
|
||||||
|
else:
|
||||||
|
return res
|
||||||
|
|
||||||
|
def add_domain_language(domain, language):
|
||||||
|
language = language.split('-')[0]
|
||||||
|
domain_type = get_domain_type(domain)
|
||||||
|
r_serv_onion.sadd('all_domains_languages', language)
|
||||||
|
r_serv_onion.sadd('all_domains_languages:{}'.format(domain_type), language)
|
||||||
|
r_serv_onion.sadd('language:domains:{}:{}'.format(domain_type, language), domain)
|
||||||
|
r_serv_onion.sadd('domain:language:{}'.format(domain), language)
|
||||||
|
|
||||||
|
def add_domain_languages_by_item_id(domain, item_id):
|
||||||
|
for lang in Item.get_item_languages(item_id, min_proportion=0.2, min_probability=0.8):
|
||||||
|
add_domain_language(domain, lang.language)
|
||||||
|
|
||||||
|
def delete_domain_languages(domain):
|
||||||
|
domain_type = get_domain_type(domain)
|
||||||
|
for language in get_domain_languages(domain):
|
||||||
|
r_serv_onion.srem('language:domains:{}:{}'.format(domain_type, language), domain)
|
||||||
|
if not r_serv_onion.exists('language:domains:{}:{}'.format(domain_type, language)):
|
||||||
|
r_serv_onion.srem('all_domains_languages:{}'.format(domain_type), language)
|
||||||
|
exist_domain_type_lang = False
|
||||||
|
for domain_type in get_all_domains_type():
|
||||||
|
if r_serv_onion.sismembers('all_domains_languages:{}'.format(domain_type), language):
|
||||||
|
exist_domain_type_lang = True
|
||||||
|
continue
|
||||||
|
if not exist_domain_type_lang:
|
||||||
|
r_serv_onion.srem('all_domains_languages', language)
|
||||||
|
r_serv_onion.delete('domain:language:{}'.format(domain))
|
||||||
|
|
||||||
|
def _delete_all_domains_languages():
|
||||||
|
for language in get_all_domains_languages():
|
||||||
|
for domain in get_all_domains_by_language(language):
|
||||||
|
delete_domain_languages(domain)
|
||||||
|
|
||||||
|
## API ##
|
||||||
|
## TODO: verify domains type + languages list
|
||||||
|
## TODO: add pagination
|
||||||
|
def api_get_domains_by_languages(domains_types, languages, domains_metadata=False, page=1):
|
||||||
|
l_domains = sorted(get_domains_by_languages(languages, l_domain_type=domains_types))
|
||||||
|
l_domains = paginate_iterator(l_domains, nb_obj=28, page=page)
|
||||||
|
if not domains_metadata:
|
||||||
|
return l_domains
|
||||||
|
else:
|
||||||
|
l_dict_domains = []
|
||||||
|
for domain in l_domains['list_elem']:
|
||||||
|
l_dict_domains.append(get_domain_metadata(domain, get_domain_type(domain), first_seen=True, last_ckeck=True,
|
||||||
|
status=True, ports=True, tags=True, tags_safe=True,
|
||||||
|
languages=True, screenshot=True))
|
||||||
|
l_domains['list_elem'] = l_dict_domains
|
||||||
|
return l_domains
|
||||||
|
####---- ----####
|
||||||
|
|
||||||
######## DOMAIN ########
|
######## DOMAIN ########
|
||||||
|
|
||||||
def get_domain_type(domain):
|
def get_domain_type(domain):
|
||||||
|
@ -498,7 +607,7 @@ def get_domain_random_screenshot(domain):
|
||||||
'''
|
'''
|
||||||
return Screenshot.get_randon_domain_screenshot(domain)
|
return Screenshot.get_randon_domain_screenshot(domain)
|
||||||
|
|
||||||
def get_domain_metadata(domain, domain_type, first_seen=True, last_ckeck=True, status=True, ports=True, tags=False, tags_safe=False, screenshot=False):
|
def get_domain_metadata(domain, domain_type, first_seen=True, last_ckeck=True, status=True, ports=True, tags=False, tags_safe=False, languages=False, screenshot=False):
|
||||||
'''
|
'''
|
||||||
Get Domain basic metadata
|
Get Domain basic metadata
|
||||||
|
|
||||||
|
@ -516,6 +625,7 @@ def get_domain_metadata(domain, domain_type, first_seen=True, last_ckeck=True, s
|
||||||
'''
|
'''
|
||||||
dict_metadata = {}
|
dict_metadata = {}
|
||||||
dict_metadata['id'] = domain
|
dict_metadata['id'] = domain
|
||||||
|
dict_metadata['type'] = domain_type
|
||||||
if first_seen:
|
if first_seen:
|
||||||
res = get_domain_first_seen(domain, domain_type=domain_type)
|
res = get_domain_first_seen(domain, domain_type=domain_type)
|
||||||
if res is not None:
|
if res is not None:
|
||||||
|
@ -535,6 +645,8 @@ def get_domain_metadata(domain, domain_type, first_seen=True, last_ckeck=True, s
|
||||||
dict_metadata['is_tags_safe'] = Tag.is_tags_safe(dict_metadata['tags'])
|
dict_metadata['is_tags_safe'] = Tag.is_tags_safe(dict_metadata['tags'])
|
||||||
else:
|
else:
|
||||||
dict_metadata['is_tags_safe'] = Tag.is_tags_safe(get_domain_tags(domain))
|
dict_metadata['is_tags_safe'] = Tag.is_tags_safe(get_domain_tags(domain))
|
||||||
|
if languages:
|
||||||
|
dict_metadata['languages'] = Language.get_languages_from_iso(get_domain_languages(domain, r_list=True), sort=True)
|
||||||
if screenshot:
|
if screenshot:
|
||||||
dict_metadata['screenshot'] = get_domain_random_screenshot(domain)
|
dict_metadata['screenshot'] = get_domain_random_screenshot(domain)
|
||||||
return dict_metadata
|
return dict_metadata
|
||||||
|
@ -796,6 +908,14 @@ class Domain(object):
|
||||||
'''
|
'''
|
||||||
return get_domain_tags(self.domain)
|
return get_domain_tags(self.domain)
|
||||||
|
|
||||||
|
def get_domain_languages(self):
|
||||||
|
'''
|
||||||
|
Retun all languages of a given domain.
|
||||||
|
|
||||||
|
:param domain: domain name
|
||||||
|
'''
|
||||||
|
return get_domain_languages(self.domain)
|
||||||
|
|
||||||
def get_domain_correlation(self):
|
def get_domain_correlation(self):
|
||||||
'''
|
'''
|
||||||
Retun all correlation of a given domain.
|
Retun all correlation of a given domain.
|
||||||
|
|
|
@ -0,0 +1,240 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*-coding:UTF-8 -*
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import redis
|
||||||
|
|
||||||
|
dict_iso_languages = {
|
||||||
|
'af': 'Afrikaans',
|
||||||
|
'am': 'Amharic',
|
||||||
|
'ar': 'Arabic',
|
||||||
|
'bg': 'Bulgarian',
|
||||||
|
'bn': 'Bangla',
|
||||||
|
'bs': 'Bosnian',
|
||||||
|
'ca': 'Catalan',
|
||||||
|
'ceb': 'Cebuano',
|
||||||
|
'co': 'Corsican',
|
||||||
|
'cs': 'Czech',
|
||||||
|
'cy': 'Welsh',
|
||||||
|
'da': 'Danish',
|
||||||
|
'de': 'German',
|
||||||
|
'el': 'Greek',
|
||||||
|
'en': 'English',
|
||||||
|
'eo': 'Esperanto',
|
||||||
|
'es': 'Spanish',
|
||||||
|
'et': 'Estonian',
|
||||||
|
'eu': 'Basque',
|
||||||
|
'fa': 'Persian',
|
||||||
|
'fi': 'Finnish',
|
||||||
|
'fil': 'Filipino',
|
||||||
|
'fr': 'French',
|
||||||
|
'fy': 'Western Frisian',
|
||||||
|
'ga': 'Irish',
|
||||||
|
'gd': 'Scottish Gaelic',
|
||||||
|
'gl': 'Galician',
|
||||||
|
'gu': 'Gujarati',
|
||||||
|
'ha': 'Hausa',
|
||||||
|
'haw': 'Hawaiian',
|
||||||
|
'hi': 'Hindi',
|
||||||
|
'hmn': 'Hmong',
|
||||||
|
'hr': 'Croatian',
|
||||||
|
'ht': 'Haitian Creole',
|
||||||
|
'hu': 'Hungarian',
|
||||||
|
'hy': 'Armenian',
|
||||||
|
'id': 'Indonesian',
|
||||||
|
'ig': 'Igbo',
|
||||||
|
'is': 'Icelandic',
|
||||||
|
'it': 'Italian',
|
||||||
|
'iw': 'Hebrew',
|
||||||
|
'ja': 'Japanese',
|
||||||
|
'jv': 'Javanese',
|
||||||
|
'ka': 'Georgian',
|
||||||
|
'kk': 'Kazakh',
|
||||||
|
'km': 'Khmer',
|
||||||
|
'kn': 'Kannada',
|
||||||
|
'ko': 'Korean',
|
||||||
|
'ku': 'Kurdish',
|
||||||
|
'ky': 'Kyrgyz',
|
||||||
|
'la': 'Latin',
|
||||||
|
'lb': 'Luxembourgish',
|
||||||
|
'lo': 'Lao',
|
||||||
|
'lt': 'Lithuanian',
|
||||||
|
'lv': 'Latvian',
|
||||||
|
'mg': 'Malagasy',
|
||||||
|
'mi': 'Maori',
|
||||||
|
'mk': 'Macedonian',
|
||||||
|
'ml': 'Malayalam',
|
||||||
|
'mn': 'Mongolian',
|
||||||
|
'mr': 'Marathi',
|
||||||
|
'ms': 'Malay',
|
||||||
|
'mt': 'Maltese',
|
||||||
|
'my': 'Burmese',
|
||||||
|
'ne': 'Nepali',
|
||||||
|
'nl': 'Dutch',
|
||||||
|
'no': 'Norwegian',
|
||||||
|
'ny': 'Nyanja',
|
||||||
|
'pa': 'Punjabi',
|
||||||
|
'pl': 'Polish',
|
||||||
|
'ps': 'Pashto',
|
||||||
|
'pt': 'Portuguese',
|
||||||
|
'ro': 'Romanian',
|
||||||
|
'ru': 'Russian',
|
||||||
|
'sd': 'Sindhi',
|
||||||
|
'si': 'Sinhala',
|
||||||
|
'sk': 'Slovak',
|
||||||
|
'sl': 'Slovenian',
|
||||||
|
'sm': 'Samoan',
|
||||||
|
'sn': 'Shona',
|
||||||
|
'so': 'Somali',
|
||||||
|
'sq': 'Albanian',
|
||||||
|
'sr': 'Serbian',
|
||||||
|
'st': 'Southern Sotho',
|
||||||
|
'su': 'Sundanese',
|
||||||
|
'sv': 'Swedish',
|
||||||
|
'sw': 'Swahili',
|
||||||
|
'ta': 'Tamil',
|
||||||
|
'te': 'Telugu',
|
||||||
|
'tg': 'Tajik',
|
||||||
|
'th': 'Thai',
|
||||||
|
'tr': 'Turkish',
|
||||||
|
'uk': 'Ukrainian',
|
||||||
|
'ur': 'Urdu',
|
||||||
|
'uz': 'Uzbek',
|
||||||
|
'vi': 'Vietnamese',
|
||||||
|
'xh': 'Xhosa',
|
||||||
|
'yi': 'Yiddish',
|
||||||
|
'yo': 'Yoruba',
|
||||||
|
'zh': 'Chinese',
|
||||||
|
'zu': 'Zulu'
|
||||||
|
}
|
||||||
|
|
||||||
|
dict_languages_iso = {
|
||||||
|
'Afrikaans': 'af',
|
||||||
|
'Amharic': 'am',
|
||||||
|
'Arabic': 'ar',
|
||||||
|
'Bulgarian': 'bg',
|
||||||
|
'Bangla': 'bn',
|
||||||
|
'Bosnian': 'bs',
|
||||||
|
'Catalan': 'ca',
|
||||||
|
'Cebuano': 'ceb',
|
||||||
|
'Corsican': 'co',
|
||||||
|
'Czech': 'cs',
|
||||||
|
'Welsh': 'cy',
|
||||||
|
'Danish': 'da',
|
||||||
|
'German': 'de',
|
||||||
|
'Greek': 'el',
|
||||||
|
'English': 'en',
|
||||||
|
'Esperanto': 'eo',
|
||||||
|
'Spanish': 'es',
|
||||||
|
'Estonian': 'et',
|
||||||
|
'Basque': 'eu',
|
||||||
|
'Persian': 'fa',
|
||||||
|
'Finnish': 'fi',
|
||||||
|
'Filipino': 'fil',
|
||||||
|
'French': 'fr',
|
||||||
|
'Western Frisian': 'fy',
|
||||||
|
'Irish': 'ga',
|
||||||
|
'Scottish Gaelic': 'gd',
|
||||||
|
'Galician': 'gl',
|
||||||
|
'Gujarati': 'gu',
|
||||||
|
'Hausa': 'ha',
|
||||||
|
'Hawaiian': 'haw',
|
||||||
|
'Hindi': 'hi',
|
||||||
|
'Hmong': 'hmn',
|
||||||
|
'Croatian': 'hr',
|
||||||
|
'Haitian Creole': 'ht',
|
||||||
|
'Hungarian': 'hu',
|
||||||
|
'Armenian': 'hy',
|
||||||
|
'Indonesian': 'id',
|
||||||
|
'Igbo': 'ig',
|
||||||
|
'Icelandic': 'is',
|
||||||
|
'Italian': 'it',
|
||||||
|
'Hebrew': 'iw',
|
||||||
|
'Japanese': 'ja',
|
||||||
|
'Javanese': 'jv',
|
||||||
|
'Georgian': 'ka',
|
||||||
|
'Kazakh': 'kk',
|
||||||
|
'Khmer': 'km',
|
||||||
|
'Kannada': 'kn',
|
||||||
|
'Korean': 'ko',
|
||||||
|
'Kurdish': 'ku',
|
||||||
|
'Kyrgyz': 'ky',
|
||||||
|
'Latin': 'la',
|
||||||
|
'Luxembourgish': 'lb',
|
||||||
|
'Lao': 'lo',
|
||||||
|
'Lithuanian': 'lt',
|
||||||
|
'Latvian': 'lv',
|
||||||
|
'Malagasy': 'mg',
|
||||||
|
'Maori': 'mi',
|
||||||
|
'Macedonian': 'mk',
|
||||||
|
'Malayalam': 'ml',
|
||||||
|
'Mongolian': 'mn',
|
||||||
|
'Marathi': 'mr',
|
||||||
|
'Malay': 'ms',
|
||||||
|
'Maltese': 'mt',
|
||||||
|
'Burmese': 'my',
|
||||||
|
'Nepali': 'ne',
|
||||||
|
'Dutch': 'nl',
|
||||||
|
'Norwegian': 'no',
|
||||||
|
'Nyanja': 'ny',
|
||||||
|
'Punjabi': 'pa',
|
||||||
|
'Polish': 'pl',
|
||||||
|
'Pashto': 'ps',
|
||||||
|
'Portuguese': 'pt',
|
||||||
|
'Romanian': 'ro',
|
||||||
|
'Russian': 'ru',
|
||||||
|
'Sindhi': 'sd',
|
||||||
|
'Sinhala': 'si',
|
||||||
|
'Slovak': 'sk',
|
||||||
|
'Slovenian': 'sl',
|
||||||
|
'Samoan': 'sm',
|
||||||
|
'Shona': 'sn',
|
||||||
|
'Somali': 'so',
|
||||||
|
'Albanian': 'sq',
|
||||||
|
'Serbian': 'sr',
|
||||||
|
'Southern Sotho': 'st',
|
||||||
|
'Sundanese': 'su',
|
||||||
|
'Swedish': 'sv',
|
||||||
|
'Swahili': 'sw',
|
||||||
|
'Tamil': 'ta',
|
||||||
|
'Telugu': 'te',
|
||||||
|
'Tajik': 'tg',
|
||||||
|
'Thai': 'th',
|
||||||
|
'Turkish': 'tr',
|
||||||
|
'Ukrainian': 'uk',
|
||||||
|
'Urdu': 'ur',
|
||||||
|
'Uzbek': 'uz',
|
||||||
|
'Vietnamese': 'vi',
|
||||||
|
'Xhosa': 'xh',
|
||||||
|
'Yiddish': 'yi',
|
||||||
|
'Yoruba': 'yo',
|
||||||
|
'Chinese': 'zh',
|
||||||
|
'Zulu': 'zu'
|
||||||
|
}
|
||||||
|
|
||||||
|
def get_language_from_iso(iso_language):
|
||||||
|
return dict_iso_languages.get(iso_language, None)
|
||||||
|
|
||||||
|
def get_languages_from_iso(l_iso_languages, sort=False):
|
||||||
|
l_languages = []
|
||||||
|
for iso_language in l_iso_languages:
|
||||||
|
language = get_language_from_iso(iso_language)
|
||||||
|
if language:
|
||||||
|
l_languages.append(language)
|
||||||
|
if sort:
|
||||||
|
l_languages = sorted(l_languages)
|
||||||
|
return l_languages
|
||||||
|
|
||||||
|
def get_iso_from_language(language):
|
||||||
|
return dict_languages_iso.get(language, None)
|
||||||
|
|
||||||
|
def get_iso_from_languages(l_languages, sort=False):
|
||||||
|
l_iso = []
|
||||||
|
for language in l_languages:
|
||||||
|
iso_lang = get_iso_from_language(language)
|
||||||
|
if iso_lang:
|
||||||
|
l_iso.append(iso_lang)
|
||||||
|
if sort:
|
||||||
|
l_iso = sorted(l_iso)
|
||||||
|
return l_iso
|
|
@ -2,8 +2,10 @@
|
||||||
# -*-coding:UTF-8 -*
|
# -*-coding:UTF-8 -*
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
import sys
|
import sys
|
||||||
import redis
|
import redis
|
||||||
|
import cld3
|
||||||
import html2text
|
import html2text
|
||||||
|
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
|
@ -101,13 +103,62 @@ def add_item_parent(item_parent, item_id):
|
||||||
def get_item_content(item_id):
|
def get_item_content(item_id):
|
||||||
return item_basic.get_item_content(item_id)
|
return item_basic.get_item_content(item_id)
|
||||||
|
|
||||||
def get_item_content_html2text(item_id, item_content=None):
|
def get_item_content_html2text(item_id, item_content=None, ignore_links=False):
|
||||||
if not item_content:
|
if not item_content:
|
||||||
item_content = get_item_content(item_id)
|
item_content = get_item_content(item_id)
|
||||||
h = html2text.HTML2Text()
|
h = html2text.HTML2Text()
|
||||||
h.ignore_links = False
|
h.ignore_links = ignore_links
|
||||||
|
h.ignore_images = ignore_links
|
||||||
return h.handle(item_content)
|
return h.handle(item_content)
|
||||||
|
|
||||||
|
def remove_all_urls_from_content(item_id, item_content=None):
|
||||||
|
if not item_content:
|
||||||
|
item_content = get_item_content(item_id)
|
||||||
|
regex = r'\b(?:http://|https://)?(?:[a-zA-Z\d-]{,63}(?:\.[a-zA-Z\d-]{,63})+)(?:\:[0-9]+)*(?:/(?:$|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*\b'
|
||||||
|
url_regex = re.compile(regex)
|
||||||
|
urls = url_regex.findall(item_content)
|
||||||
|
urls = sorted(urls, key=len, reverse=True)
|
||||||
|
for url in urls:
|
||||||
|
item_content = item_content.replace(url, '')
|
||||||
|
|
||||||
|
regex_pgp_public_blocs = r'-----BEGIN PGP PUBLIC KEY BLOCK-----[\s\S]+?-----END PGP PUBLIC KEY BLOCK-----'
|
||||||
|
regex_pgp_signature = r'-----BEGIN PGP SIGNATURE-----[\s\S]+?-----END PGP SIGNATURE-----'
|
||||||
|
regex_pgp_message = r'-----BEGIN PGP MESSAGE-----[\s\S]+?-----END PGP MESSAGE-----'
|
||||||
|
re.compile(regex_pgp_public_blocs)
|
||||||
|
re.compile(regex_pgp_signature)
|
||||||
|
re.compile(regex_pgp_message)
|
||||||
|
|
||||||
|
res = re.findall(regex_pgp_public_blocs, item_content)
|
||||||
|
for it in res:
|
||||||
|
item_content = item_content.replace(it, '')
|
||||||
|
res = re.findall(regex_pgp_signature, item_content)
|
||||||
|
for it in res:
|
||||||
|
item_content = item_content.replace(it, '')
|
||||||
|
res = re.findall(regex_pgp_message, item_content)
|
||||||
|
for it in res:
|
||||||
|
item_content = item_content.replace(it, '')
|
||||||
|
|
||||||
|
return item_content
|
||||||
|
|
||||||
|
def get_item_languages(item_id, min_len=600, num_langs=3, min_proportion=0.2, min_probability=0.7):
|
||||||
|
all_languages = []
|
||||||
|
|
||||||
|
## CLEAN CONTENT ##
|
||||||
|
content = get_item_content_html2text(item_id, ignore_links=True)
|
||||||
|
content = remove_all_urls_from_content(item_id, item_content=content)
|
||||||
|
|
||||||
|
# REMOVE USELESS SPACE
|
||||||
|
content = ' '.join(content.split())
|
||||||
|
#- CLEAN CONTENT -#
|
||||||
|
|
||||||
|
#print(content)
|
||||||
|
#print(len(content))
|
||||||
|
if len(content) >= min_len:
|
||||||
|
for lang in cld3.get_frequent_languages(content, num_langs=num_langs):
|
||||||
|
if lang.proportion >= min_proportion and lang.probability >= min_probability and lang.is_reliable:
|
||||||
|
all_languages.append(lang)
|
||||||
|
return all_languages
|
||||||
|
|
||||||
# API
|
# API
|
||||||
def get_item(request_dict):
|
def get_item(request_dict):
|
||||||
if not request_dict:
|
if not request_dict:
|
||||||
|
@ -496,3 +547,17 @@ def delete_domain_node(item_id):
|
||||||
domain_basic.delete_domain_item_core(item_id, domain, port)
|
domain_basic.delete_domain_item_core(item_id, domain, port)
|
||||||
for child_id in get_all_domain_node_by_item_id(item_id):
|
for child_id in get_all_domain_node_by_item_id(item_id):
|
||||||
delete_item(child_id)
|
delete_item(child_id)
|
||||||
|
|
||||||
|
# if __name__ == '__main__':
|
||||||
|
# import Domain
|
||||||
|
# domain = Domain.Domain('domain.onion')
|
||||||
|
# for domain_history in domain.get_domain_history():
|
||||||
|
# domain_item = domain.get_domain_items_crawled(epoch=domain_history[1]) # item_tag
|
||||||
|
# if "items" in domain_item:
|
||||||
|
# for item_dict in domain_item['items']:
|
||||||
|
# item_id = item_dict['id']
|
||||||
|
# print(item_id)
|
||||||
|
# for lang in get_item_languages(item_id, min_proportion=0.2, min_probability=0.8):
|
||||||
|
# print(lang)
|
||||||
|
# print()
|
||||||
|
# print(get_item_languages(item_id, min_proportion=0.2, min_probability=0.6)) # 0.7 ?
|
||||||
|
|
|
@ -46,6 +46,9 @@ publish = Redis_Tags
|
||||||
subscribe = Redis_Global
|
subscribe = Redis_Global
|
||||||
publish = Redis_Tags
|
publish = Redis_Tags
|
||||||
|
|
||||||
|
[Languages]
|
||||||
|
subscribe = Redis_Global
|
||||||
|
|
||||||
[Categ]
|
[Categ]
|
||||||
subscribe = Redis_Global
|
subscribe = Redis_Global
|
||||||
publish = Redis_CreditCards,Redis_Mail,Redis_Onion,Redis_Web,Redis_Credential,Redis_SourceCode,Redis_Cve,Redis_ApiKey
|
publish = Redis_CreditCards,Redis_Mail,Redis_Onion,Redis_Web,Redis_Credential,Redis_SourceCode,Redis_Cve,Redis_ApiKey
|
||||||
|
|
|
@ -17,6 +17,25 @@ import subprocess
|
||||||
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
|
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
|
||||||
import ConfigLoader
|
import ConfigLoader
|
||||||
|
|
||||||
|
def launch_background_upgrade(version, script_name):
|
||||||
|
if r_serv.sismember('ail:to_update', version):
|
||||||
|
r_serv.delete('ail:update_error')
|
||||||
|
r_serv.set('ail:update_in_progress', version)
|
||||||
|
r_serv.set('ail:current_background_update', version)
|
||||||
|
r_serv.set('ail:current_background_script', 'domain tags update')
|
||||||
|
|
||||||
|
update_file = os.path.join(os.environ['AIL_HOME'], 'update', version, script_name)
|
||||||
|
process = subprocess.run(['python' ,update_file])
|
||||||
|
|
||||||
|
update_progress = r_serv.get('ail:current_background_script_stat')
|
||||||
|
if update_progress:
|
||||||
|
if int(update_progress) == 100:
|
||||||
|
r_serv.delete('ail:update_in_progress')
|
||||||
|
r_serv.delete('ail:current_background_script')
|
||||||
|
r_serv.delete('ail:current_background_script_stat')
|
||||||
|
r_serv.delete('ail:current_background_update')
|
||||||
|
r_serv.srem('ail:to_update', new_version)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
||||||
config_loader = ConfigLoader.ConfigLoader()
|
config_loader = ConfigLoader.ConfigLoader()
|
||||||
|
@ -114,3 +133,8 @@ if __name__ == "__main__":
|
||||||
r_serv.delete('ail:current_background_script_stat')
|
r_serv.delete('ail:current_background_script_stat')
|
||||||
r_serv.delete('ail:current_background_update')
|
r_serv.delete('ail:current_background_update')
|
||||||
r_serv.srem('ail:to_update', new_version)
|
r_serv.srem('ail:to_update', new_version)
|
||||||
|
|
||||||
|
launch_background_upgrade('v2.6', 'Update_screenshots.py')
|
||||||
|
launch_background_upgrade('v2.7', 'Update_domain_tags.py')
|
||||||
|
|
||||||
|
launch_background_upgrade('v3.4', 'Update_domain.py')
|
||||||
|
|
|
@ -0,0 +1,37 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*-coding:UTF-8 -*
|
||||||
|
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import redis
|
||||||
|
import datetime
|
||||||
|
|
||||||
|
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
|
||||||
|
import ConfigLoader
|
||||||
|
|
||||||
|
new_version = 'v3.4'
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
start_deb = time.time()
|
||||||
|
|
||||||
|
config_loader = ConfigLoader.ConfigLoader()
|
||||||
|
r_serv = config_loader.get_redis_conn("ARDB_DB")
|
||||||
|
r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
|
||||||
|
config_loader = None
|
||||||
|
|
||||||
|
#Set current update_in_progress
|
||||||
|
r_serv.set('ail:update_in_progress', new_version)
|
||||||
|
r_serv.set('ail:current_background_update', new_version)
|
||||||
|
|
||||||
|
r_serv_onion.sunionstore('domain_update_v3.4', 'full_onion_up', 'full_regular_up')
|
||||||
|
r_serv.set('update:nb_elem_to_convert', r_serv_onion.scard('domain_update_v3.4'))
|
||||||
|
r_serv.set('update:nb_elem_converted',0)
|
||||||
|
|
||||||
|
#Set current ail version
|
||||||
|
r_serv.set('ail:version', new_version)
|
||||||
|
|
||||||
|
#Set current ail version
|
||||||
|
r_serv.hset('ail:update_date', new_version, datetime.datetime.now().strftime("%Y%m%d"))
|
|
@ -0,0 +1,54 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
|
||||||
|
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
|
||||||
|
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
|
||||||
|
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
|
||||||
|
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
|
||||||
|
|
||||||
|
export PATH=$AIL_HOME:$PATH
|
||||||
|
export PATH=$AIL_REDIS:$PATH
|
||||||
|
export PATH=$AIL_ARDB:$PATH
|
||||||
|
export PATH=$AIL_BIN:$PATH
|
||||||
|
export PATH=$AIL_FLASK:$PATH
|
||||||
|
|
||||||
|
GREEN="\\033[1;32m"
|
||||||
|
DEFAULT="\\033[0;39m"
|
||||||
|
|
||||||
|
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
|
||||||
|
bash ${AIL_BIN}/LAUNCH.sh -ks
|
||||||
|
wait
|
||||||
|
|
||||||
|
bash ${AIL_BIN}/LAUNCH.sh -ldbv &
|
||||||
|
wait
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# SUBMODULES #
|
||||||
|
git submodule update
|
||||||
|
|
||||||
|
# echo ""
|
||||||
|
# echo -e $GREEN"installing KVORCKS ..."$DEFAULT
|
||||||
|
# cd ${AIL_HOME}
|
||||||
|
# test ! -d kvrocks/ && git clone https://github.com/bitleak/kvrocks.git
|
||||||
|
# pushd kvrocks/
|
||||||
|
# make -j4
|
||||||
|
# popd
|
||||||
|
|
||||||
|
echo -e $GREEN"Installing html2text ..."$DEFAULT
|
||||||
|
pip3 install pycld3
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
|
||||||
|
echo ""
|
||||||
|
python ${AIL_HOME}/update/v3.4/Update.py
|
||||||
|
wait
|
||||||
|
echo ""
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo -e $GREEN"Shutting down ARDB ..."$DEFAULT
|
||||||
|
bash ${AIL_BIN}/LAUNCH.sh -ks
|
||||||
|
wait
|
||||||
|
|
||||||
|
exit 0
|
|
@ -0,0 +1,57 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*-coding:UTF-8 -*
|
||||||
|
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import redis
|
||||||
|
import datetime
|
||||||
|
|
||||||
|
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
|
||||||
|
import ConfigLoader
|
||||||
|
import Domain
|
||||||
|
|
||||||
|
def update_update_stats():
|
||||||
|
nb_updated = int(r_serv_db.get('update:nb_elem_converted'))
|
||||||
|
progress = int((nb_updated * 100) / nb_elem_to_update)
|
||||||
|
print('{}/{} updated {}%'.format(nb_updated, nb_elem_to_update, progress))
|
||||||
|
r_serv_db.set('ail:current_background_script_stat', progress)
|
||||||
|
|
||||||
|
def update_domain_language(domain_obj, item_id):
|
||||||
|
domain_name = domain_obj.get_domain_name()
|
||||||
|
Domain.add_domain_languages_by_item_id(domain_name, item_id)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
start_deb = time.time()
|
||||||
|
|
||||||
|
config_loader = ConfigLoader.ConfigLoader()
|
||||||
|
r_serv_db = config_loader.get_redis_conn("ARDB_DB")
|
||||||
|
r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
|
||||||
|
config_loader = None
|
||||||
|
|
||||||
|
nb_elem_to_update = r_serv_db.get('update:nb_elem_to_convert')
|
||||||
|
if not nb_elem_to_update:
|
||||||
|
nb_elem_to_update = 1
|
||||||
|
else:
|
||||||
|
nb_elem_to_update = int(nb_elem_to_update)
|
||||||
|
|
||||||
|
#Domain._delete_all_domains_languages()
|
||||||
|
|
||||||
|
while True:
|
||||||
|
domain = r_serv_onion.spop('domain_update_v3.4')
|
||||||
|
if domain is not None:
|
||||||
|
print(domain)
|
||||||
|
domain = Domain.Domain(domain)
|
||||||
|
for domain_history in domain.get_domain_history():
|
||||||
|
domain_item = domain.get_domain_items_crawled(epoch=domain_history[1]) # item_tag
|
||||||
|
if "items" in domain_item:
|
||||||
|
for item_dict in domain_item['items']:
|
||||||
|
update_domain_language(domain, item_dict['id'])
|
||||||
|
|
||||||
|
r_serv_db.incr('update:nb_elem_converted')
|
||||||
|
update_update_stats()
|
||||||
|
|
||||||
|
else:
|
||||||
|
sys.exit(0)
|
|
@ -26,6 +26,7 @@ import Tag
|
||||||
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
|
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
|
||||||
import Domain
|
import Domain
|
||||||
import crawlers
|
import crawlers
|
||||||
|
import Language
|
||||||
|
|
||||||
r_cache = Flask_config.r_cache
|
r_cache = Flask_config.r_cache
|
||||||
r_serv_db = Flask_config.r_serv_db
|
r_serv_db = Flask_config.r_serv_db
|
||||||
|
@ -85,6 +86,9 @@ def send_to_spider():
|
||||||
return create_json_response(res[0], res[1])
|
return create_json_response(res[0], res[1])
|
||||||
return redirect(url_for('crawler_splash.manual'))
|
return redirect(url_for('crawler_splash.manual'))
|
||||||
|
|
||||||
|
|
||||||
|
#### Domains ####
|
||||||
|
|
||||||
# add route : /crawlers/show_domain
|
# add route : /crawlers/show_domain
|
||||||
@crawler_splash.route('/crawlers/showDomain', methods=['GET', 'POST'])
|
@crawler_splash.route('/crawlers/showDomain', methods=['GET', 'POST'])
|
||||||
@login_required
|
@login_required
|
||||||
|
@ -111,6 +115,7 @@ def showDomain():
|
||||||
dict_domain = {**dict_domain, **domain.get_domain_correlation()}
|
dict_domain = {**dict_domain, **domain.get_domain_correlation()}
|
||||||
dict_domain['correlation_nb'] = Domain.get_domain_total_nb_correlation(dict_domain)
|
dict_domain['correlation_nb'] = Domain.get_domain_total_nb_correlation(dict_domain)
|
||||||
dict_domain['father'] = domain.get_domain_father()
|
dict_domain['father'] = domain.get_domain_father()
|
||||||
|
dict_domain['languages'] = Language.get_languages_from_iso(domain.get_domain_languages(), sort=True)
|
||||||
dict_domain['tags'] = domain.get_domain_tags()
|
dict_domain['tags'] = domain.get_domain_tags()
|
||||||
dict_domain['tags_safe'] = Tag.is_tags_safe(dict_domain['tags'])
|
dict_domain['tags_safe'] = Tag.is_tags_safe(dict_domain['tags'])
|
||||||
dict_domain['history'] = domain.get_domain_history_with_status()
|
dict_domain['history'] = domain.get_domain_history_with_status()
|
||||||
|
@ -198,6 +203,38 @@ def domains_explorer_web():
|
||||||
dict_data = Domain.get_domains_up_by_filers('regular', page=page, date_from=date_from, date_to=date_to)
|
dict_data = Domain.get_domains_up_by_filers('regular', page=page, date_from=date_from, date_to=date_to)
|
||||||
return render_template("domain_explorer.html", dict_data=dict_data, bootstrap_label=bootstrap_label, domain_type='regular')
|
return render_template("domain_explorer.html", dict_data=dict_data, bootstrap_label=bootstrap_label, domain_type='regular')
|
||||||
|
|
||||||
|
@crawler_splash.route('/domains/languages/all/json', methods=['GET'])
|
||||||
|
@login_required
|
||||||
|
@login_read_only
|
||||||
|
def domains_all_languages_json():
|
||||||
|
# # TODO: get domain type
|
||||||
|
iso = request.args.get('iso')
|
||||||
|
domain_types = request.args.getlist('domain_types')
|
||||||
|
return jsonify(Language.get_languages_from_iso(Domain.get_all_domains_languages(), sort=True))
|
||||||
|
|
||||||
|
@crawler_splash.route('/domains/languages/search_get', methods=['GET'])
|
||||||
|
@login_required
|
||||||
|
@login_read_only
|
||||||
|
def domains_search_languages_get():
|
||||||
|
page = request.args.get('page')
|
||||||
|
try:
|
||||||
|
page = int(page)
|
||||||
|
except:
|
||||||
|
page = 1
|
||||||
|
domains_types = request.args.getlist('domain_types')
|
||||||
|
if domains_types:
|
||||||
|
domains_types = domains_types[0].split(',')
|
||||||
|
languages = request.args.getlist('languages')
|
||||||
|
if languages:
|
||||||
|
languages = languages[0].split(',')
|
||||||
|
l_dict_domains = Domain.api_get_domains_by_languages(domains_types, Language.get_iso_from_languages(languages), domains_metadata=True, page=page)
|
||||||
|
return render_template("domains/domains_filter_languages.html", template_folder='../../',
|
||||||
|
l_dict_domains=l_dict_domains, bootstrap_label=bootstrap_label,
|
||||||
|
current_languages=languages, domains_types=domains_types)
|
||||||
|
|
||||||
|
##-- --##
|
||||||
|
|
||||||
|
|
||||||
## Cookiejar ##
|
## Cookiejar ##
|
||||||
@crawler_splash.route('/crawler/cookiejar/add', methods=['GET'])
|
@crawler_splash.route('/crawler/cookiejar/add', methods=['GET'])
|
||||||
@login_required
|
@login_required
|
||||||
|
|
|
@ -148,6 +148,10 @@
|
||||||
{% include 'tags/block_obj_tags_search.html' %}
|
{% include 'tags/block_obj_tags_search.html' %}
|
||||||
{% endwith %}
|
{% endwith %}
|
||||||
|
|
||||||
|
{% with object_type='domain' %}
|
||||||
|
{% include 'domains/block_languages_search.html' %}
|
||||||
|
{% endwith %}
|
||||||
|
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
|
@ -68,7 +68,7 @@
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
|
||||||
{% with dict_data=dict_data, bootstrap_label=bootstrap_label %}
|
{% with l_dict_domains=dict_data['list_elem'], bootstrap_label=bootstrap_label %}
|
||||||
{% include 'domains/card_img_domain.html' %}
|
{% include 'domains/card_img_domain.html' %}
|
||||||
{% endwith %}
|
{% endwith %}
|
||||||
|
|
||||||
|
|
|
@ -67,6 +67,7 @@
|
||||||
<th>First Seen</th>
|
<th>First Seen</th>
|
||||||
<th>Last Check</th>
|
<th>Last Check</th>
|
||||||
<th>Ports</th>
|
<th>Ports</th>
|
||||||
|
<th>Languages</th>
|
||||||
</tr>
|
</tr>
|
||||||
</thead>
|
</thead>
|
||||||
<tbody>
|
<tbody>
|
||||||
|
@ -74,6 +75,11 @@
|
||||||
<td class="panelText">{%if "first_seen" in dict_domain%}{{ dict_domain['first_seen'] }}{%endif%}</td>
|
<td class="panelText">{%if "first_seen" in dict_domain%}{{ dict_domain['first_seen'] }}{%endif%}</td>
|
||||||
<td class="panelText">{%if "last_check" in dict_domain%}{{ dict_domain['last_check'] }}{%endif%}</td>
|
<td class="panelText">{%if "last_check" in dict_domain%}{{ dict_domain['last_check'] }}{%endif%}</td>
|
||||||
<td class="panelText">{%if dict_domain["ports"]%}{{ dict_domain["ports"] }}{%endif%}</td>
|
<td class="panelText">{%if dict_domain["ports"]%}{{ dict_domain["ports"] }}{%endif%}</td>
|
||||||
|
<td class="panelText">
|
||||||
|
{% for languages in dict_domain['languages'] %}
|
||||||
|
{{languages}}
|
||||||
|
{% endfor %}
|
||||||
|
</td>
|
||||||
</tr>
|
</tr>
|
||||||
</tbody>
|
</tbody>
|
||||||
</table>
|
</table>
|
||||||
|
|
|
@ -0,0 +1,73 @@
|
||||||
|
<div class="card mb-3 mt-1">
|
||||||
|
<div class="card-header text-white bg-dark">
|
||||||
|
<h5 class="card-title mb-0">
|
||||||
|
<i class="fas fa-language" style="font-size: 1.8rem;"></i> Domains by Languages :
|
||||||
|
</h5>
|
||||||
|
</div>
|
||||||
|
<div class="card-body">
|
||||||
|
|
||||||
|
<div class="input-group">
|
||||||
|
<div class="input-group-prepend">
|
||||||
|
<button class="btn btn-outline-danger" type="button" id="button-clear" style="z-index: 1;" onclick="emptySearch()">
|
||||||
|
<i class="fas fa-eraser"></i>
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
<input id="llanguages" name="llanguages" type="text" class="form-control" aria-describedby="button-clear" autocomplete="off">
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="mb-3">
|
||||||
|
<div class="custom-control custom-switch">
|
||||||
|
<input class="custom-control-input" type="checkbox" name="domain_onion_switch" value="" id="domain_onion_switch" {%if 'onion' in domains_types%}checked{%endif%}>
|
||||||
|
<label class="custom-control-label" for="domain_onion_switch">
|
||||||
|
<span class="badge badge-danger"><i class="fas fa-user-secret"></i> Onion Domains</span>
|
||||||
|
</label>
|
||||||
|
</div>
|
||||||
|
<div class="custom-control custom-switch">
|
||||||
|
<input class="custom-control-input" type="checkbox" name="domain_regular_switch" value="True" id="domain_regular_switch"{%if 'regular' in domains_types%}checked{%endif%}>
|
||||||
|
<label class="custom-control-label" for="domain_regular_switch">
|
||||||
|
<span class="badge badge-warning"><i class="fab fa-html5"></i> Web Domains</span>
|
||||||
|
</label>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<button class="btn btn-primary" type="button" id="button-search" onclick="searchLanguages()">
|
||||||
|
<i class="fas fa-search"></i> Search
|
||||||
|
</button>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
<link href="{{ url_for('static', filename='css/tags.css') }}" rel="stylesheet" type="text/css" />
|
||||||
|
<script src="{{ url_for('static', filename='js/tags.js') }}"></script>
|
||||||
|
<script>
|
||||||
|
var llanguages;
|
||||||
|
$.getJSON("{{ url_for('crawler_splash.domains_all_languages_json') }}", //?object_type={{ object_type }}"
|
||||||
|
function(data) {
|
||||||
|
llanguages = $('#llanguages').tagSuggest({
|
||||||
|
data: data,
|
||||||
|
value: [{%if current_languages%}{% for language in current_languages %}'{{language|safe}}',{%endfor%}{%endif%}],
|
||||||
|
sortOrder: 'name',
|
||||||
|
maxDropHeight: 200,
|
||||||
|
name: 'llanguages'
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
function searchLanguages() {
|
||||||
|
var all_domain_types = ['onion', 'regular'] // TODO: load from flask
|
||||||
|
var l_domains_types = [];
|
||||||
|
|
||||||
|
var data = llanguages.getValue();
|
||||||
|
for (var i = 0; i < all_domain_types.length; i++) {
|
||||||
|
if (document.getElementById('domain_'+ all_domain_types[i] +'_switch').checked) {
|
||||||
|
l_domains_types.push(all_domain_types[i])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
var parameter = "?languages=" + data + "&domain_types=" + l_domains_types +"{%if page%}&page={{ page }}{%endif%}";
|
||||||
|
window.location.href = "{{ url_for('crawler_splash.domains_search_languages_get') }}" + parameter;
|
||||||
|
}
|
||||||
|
function emptySearch() {
|
||||||
|
llanguages.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
</script>
|
|
@ -1,10 +1,10 @@
|
||||||
{% for dict_domain in dict_data['list_elem'] %}
|
{% for dict_domain in l_dict_domains %}
|
||||||
|
|
||||||
{% if loop.index0 % 4 == 0 %}
|
{% if loop.index0 % 4 == 0 %}
|
||||||
<div class="card-deck mt-3">
|
<div class="card-deck mt-3">
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
<div class="card">
|
<div class="card {% if dict_domain["status"] %}border-success{% else %}border-danger{% endif %}">
|
||||||
<div class="text-center">
|
<div class="text-center">
|
||||||
<canvas id="canvas_{{loop.index0}}" style="max-height: 400px; max-width: 100%;"></canvas>
|
<canvas id="canvas_{{loop.index0}}" style="max-height: 400px; max-width: 100%;"></canvas>
|
||||||
</div>
|
</div>
|
||||||
|
@ -13,24 +13,46 @@
|
||||||
<a target="_blank" href="{{ url_for('crawler_splash.showDomain') }}?domain={{dict_domain["id"]}}">
|
<a target="_blank" href="{{ url_for('crawler_splash.showDomain') }}?domain={{dict_domain["id"]}}">
|
||||||
{{dict_domain["id"]}}
|
{{dict_domain["id"]}}
|
||||||
</a>
|
</a>
|
||||||
|
|
||||||
|
{% if dict_domain["status"] %}
|
||||||
|
<span style="color:Green;">
|
||||||
|
<i class="fas fa-check-circle"></i> UP
|
||||||
|
</span>
|
||||||
|
{% else %}
|
||||||
|
<span style="color:Red;">
|
||||||
|
<i class="fas fa-times-circle"></i> DOWN
|
||||||
|
</span>
|
||||||
|
{% endif %}
|
||||||
</h5>
|
</h5>
|
||||||
|
<div>
|
||||||
|
<span class="badge badge-dark">
|
||||||
|
<span data-toggle="tooltip" data-placement="top" title="Tooltip on top">
|
||||||
|
<span class="badge badge-info" style="font-size: 0.8rem;">
|
||||||
|
<i class="fas fa-hourglass-start"></i>
|
||||||
|
</span>
|
||||||
|
{{dict_domain["first_seen"]}}
|
||||||
|
</span>
|
||||||
|
<span class="badge badge-light mx-1" style="font-size: 1rem;">
|
||||||
|
<i class="far fa-calendar-alt"></i>
|
||||||
|
</span>
|
||||||
|
{{dict_domain["first_seen"]}}
|
||||||
|
<span class="badge badge-secondary" style="font-size: 0.8rem;">
|
||||||
|
<i class="fas fa-hourglass-end"></i>
|
||||||
|
</span>
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
<p class="card-text">
|
<p class="card-text">
|
||||||
<small class="text-muted">
|
<small class="text-muted">
|
||||||
First seen: {{dict_domain["first_seen"]}}<br>
|
Ports: {{dict_domain["ports"]}}<br>
|
||||||
Last_seen: {{dict_domain["first_seen"]}}<br>
|
{% if dict_domain['languages'] %}
|
||||||
Ports: {{dict_domain["ports"]}}
|
Languages:
|
||||||
|
{% for language in dict_domain['languages'] %}
|
||||||
|
<span class="badge badge-secondary" style="font-size: 0.75rem;">{{ language }}</span>
|
||||||
|
{% endfor %}
|
||||||
|
{% endif %}
|
||||||
</small>
|
</small>
|
||||||
</p>
|
</p>
|
||||||
<small class="text-muted">Status: </small>
|
|
||||||
{% if dict_domain["status"] %}
|
|
||||||
<span style="color:Green;">
|
|
||||||
<i class="fas fa-check-circle"></i> UP
|
|
||||||
</span>
|
|
||||||
{% else %}
|
|
||||||
<span style="color:Red;">
|
|
||||||
<i class="fas fa-times-circle"></i> DOWN
|
|
||||||
</span>
|
|
||||||
{% endif %}
|
|
||||||
<div>
|
<div>
|
||||||
{% for tag in dict_domain['tags'] %}
|
{% for tag in dict_domain['tags'] %}
|
||||||
<a href="{{ url_for('tags_ui.get_obj_by_tags') }}?object_type=domain<ags={{ tag }}">
|
<a href="{{ url_for('tags_ui.get_obj_by_tags') }}?object_type=domain<ags={{ tag }}">
|
||||||
|
@ -50,6 +72,6 @@
|
||||||
|
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
|
|
||||||
{% if dict_data['list_elem']|length % 4 != 0 %}
|
{% if l_dict_domains|length % 4 != 0 %}
|
||||||
</div>
|
</div>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
|
@ -0,0 +1,192 @@
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>Show Domain - AIL</title>
|
||||||
|
<link rel="icon" href="{{ url_for('static', filename='image/ail-icon.png') }}">
|
||||||
|
<!-- Core CSS -->
|
||||||
|
<link href="{{ url_for('static', filename='css/bootstrap4.min.css') }}" rel="stylesheet">
|
||||||
|
<link href="{{ url_for('static', filename='css/font-awesome.min.css') }}" rel="stylesheet">
|
||||||
|
<!-- JS -->
|
||||||
|
<script src="{{ url_for('static', filename='js/jquery.js')}}"></script>
|
||||||
|
<script src="{{ url_for('static', filename='js/popper.min.js')}}"></script>
|
||||||
|
<script src="{{ url_for('static', filename='js/bootstrap4.min.js') }}"></script>
|
||||||
|
|
||||||
|
<style>
|
||||||
|
.card-columns {
|
||||||
|
column-count: 4;
|
||||||
|
}
|
||||||
|
</style>
|
||||||
|
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
|
||||||
|
{% include 'nav_bar.html' %}
|
||||||
|
|
||||||
|
<div class="container-fluid">
|
||||||
|
<div class="row">
|
||||||
|
|
||||||
|
{% include 'crawler/menu_sidebar.html' %}
|
||||||
|
|
||||||
|
<div class="col-12 col-lg-10" id="core_content">
|
||||||
|
|
||||||
|
<div class="row">
|
||||||
|
<div class="col-12 col-lg-6">
|
||||||
|
|
||||||
|
{% include 'domains/block_languages_search.html' %}
|
||||||
|
|
||||||
|
|
||||||
|
</div>
|
||||||
|
<div class="col-12 col-xl-6">
|
||||||
|
|
||||||
|
<div class="card my-2 border-secondary" >
|
||||||
|
<div class="card-body py-2">
|
||||||
|
<div class="row">
|
||||||
|
<div class="col-md-3 text-center">
|
||||||
|
<button class="btn btn-primary" onclick="blocks.value=0;pixelate_all();">
|
||||||
|
<i class="fas fa-eye-slash"></i>
|
||||||
|
<span class="label-icon">Hide</span>
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
<div class="col-md-6">
|
||||||
|
<input class="custom-range mt-2" id="blocks" type="range" min="1" max="50" value="5">
|
||||||
|
</div>
|
||||||
|
<div class="col-md-3 text-center">
|
||||||
|
<button class="btn btn-primary" onclick="blocks.value=50;pixelate_all();">
|
||||||
|
<i class="fas fa-plus-square"></i>
|
||||||
|
<span class="label-icon">Full resolution</span>
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
{% with l_dict_domains=l_dict_domains['list_elem'], bootstrap_label=bootstrap_label %}
|
||||||
|
{% include 'domains/card_img_domain.html' %}
|
||||||
|
{% endwith %}
|
||||||
|
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
|
|
||||||
|
{%if l_dict_domains['list_elem']%}
|
||||||
|
{% with page=l_dict_domains['page'], nb_page_max=l_dict_domains['nb_pages'], nb_first_elem=l_dict_domains['nb_first_elem'], nb_last_elem=l_dict_domains['nb_last_elem'], nb_all_elem=l_dict_domains['nb_all_elem'] %}
|
||||||
|
{% set target_url=url_for('crawler_splash.domains_search_languages_get') + "?languages=" + ','.join(current_languages)%}
|
||||||
|
{%if domains_types %}
|
||||||
|
{% set target_url = target_url + '&domain_types=' + ','.join(domains_types)%}
|
||||||
|
{%endif%}
|
||||||
|
{% include 'pagination.html' %}
|
||||||
|
{% endwith %}
|
||||||
|
{%endif%}
|
||||||
|
|
||||||
|
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</body>
|
||||||
|
|
||||||
|
|
||||||
|
<script>
|
||||||
|
$(document).ready(function(){
|
||||||
|
$('#nav_title_domains_explorer').removeClass("text-muted");
|
||||||
|
|
||||||
|
});
|
||||||
|
|
||||||
|
function toggle_sidebar(){
|
||||||
|
if($('#nav_menu').is(':visible')){
|
||||||
|
$('#nav_menu').hide();
|
||||||
|
$('#side_menu').removeClass('border-right')
|
||||||
|
$('#side_menu').removeClass('col-lg-2')
|
||||||
|
$('#core_content').removeClass('col-lg-10')
|
||||||
|
}else{
|
||||||
|
$('#nav_menu').show();
|
||||||
|
$('#side_menu').addClass('border-right')
|
||||||
|
$('#side_menu').addClass('col-lg-2')
|
||||||
|
$('#core_content').addClass('col-lg-10')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
</script>
|
||||||
|
|
||||||
|
|
||||||
|
<script>
|
||||||
|
|
||||||
|
// img_url
|
||||||
|
// ctx
|
||||||
|
// canevas_id
|
||||||
|
|
||||||
|
var dict_canevas_blurr_img = {}
|
||||||
|
|
||||||
|
function init_canevas_blurr_img(canevas_id, img_url){
|
||||||
|
|
||||||
|
// ctx, turn off image smoothin
|
||||||
|
dict_canevas_blurr_img[canevas_id] = {}
|
||||||
|
var canvas_container = document.getElementById(canevas_id);
|
||||||
|
var ctx = canvas_container.getContext('2d');
|
||||||
|
ctx.webkitImageSmoothingEnabled = false;
|
||||||
|
ctx.imageSmoothingEnabled = false;
|
||||||
|
dict_canevas_blurr_img[canevas_id]["ctx"] = ctx;
|
||||||
|
|
||||||
|
// img
|
||||||
|
dict_canevas_blurr_img[canevas_id]["img"] = new Image();
|
||||||
|
dict_canevas_blurr_img[canevas_id]["img"].onload = function() {pixelate_img(canevas_id);};
|
||||||
|
dict_canevas_blurr_img[canevas_id]["img"].addEventListener("error", function() {img_error(canevas_id);});
|
||||||
|
dict_canevas_blurr_img[canevas_id]["img"].src = img_url;
|
||||||
|
}
|
||||||
|
|
||||||
|
function pixelate_all(){
|
||||||
|
Object.entries(dict_canevas_blurr_img).forEach(([key, value]) => {
|
||||||
|
pixelate_img(key);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function pixelate_img(canevas_id) {
|
||||||
|
|
||||||
|
if (typeof canevas_id !== 'undefined') {
|
||||||
|
var canevas_to_blurr = document.getElementById(canevas_id);
|
||||||
|
|
||||||
|
/// use slider value
|
||||||
|
if( blocks.value == 50 ){
|
||||||
|
size = 1;
|
||||||
|
} else {
|
||||||
|
var size = (blocks.value) * 0.01;
|
||||||
|
}
|
||||||
|
|
||||||
|
canevas_to_blurr.width = dict_canevas_blurr_img[canevas_id]["img"].width;
|
||||||
|
canevas_to_blurr.height = dict_canevas_blurr_img[canevas_id]["img"].height;
|
||||||
|
|
||||||
|
/// cache scaled width and height
|
||||||
|
w = canevas_to_blurr.width * size;
|
||||||
|
h = canevas_to_blurr.height * size;
|
||||||
|
|
||||||
|
/// draw original image to the scaled size
|
||||||
|
dict_canevas_blurr_img[canevas_id]["ctx"].drawImage(dict_canevas_blurr_img[canevas_id]["img"], 0, 0, w, h);
|
||||||
|
|
||||||
|
/// pixelated
|
||||||
|
dict_canevas_blurr_img[canevas_id]["ctx"].drawImage(canevas_to_blurr, 0, 0, w, h, 0, 0, canevas_to_blurr.width, canevas_to_blurr.height);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
function img_error(canevas_id) {
|
||||||
|
dict_canevas_blurr_img[canevas_id]["img"].onerror=null;
|
||||||
|
dict_canevas_blurr_img[canevas_id]["img"].src="{{ url_for('static', filename='image/AIL.png') }}";
|
||||||
|
}
|
||||||
|
|
||||||
|
blocks.addEventListener('change', pixelate_all, false);
|
||||||
|
|
||||||
|
{% for dict_domain in l_dict_domains['list_elem'] %}
|
||||||
|
{% if 'screenshot' in dict_domain %}
|
||||||
|
{% if dict_domain['is_tags_safe'] %}
|
||||||
|
var screenshot_url = "{{ url_for('showsavedpastes.screenshot', filename="") }}{{dict_domain['screenshot']}}";
|
||||||
|
{% else %}
|
||||||
|
var screenshot_url = "{{ url_for('static', filename='image/AIL.png') }}";
|
||||||
|
{% endif %}
|
||||||
|
init_canevas_blurr_img("canvas_{{loop.index0}}", screenshot_url);
|
||||||
|
{% endif %}
|
||||||
|
{% endfor %}
|
||||||
|
|
||||||
|
</script>
|
||||||
|
|
||||||
|
</html>
|
Loading…
Reference in New Issue