mirror of https://github.com/CIRCL/AIL-framework
chg: [onion module] filter onion v2
parent
faea17572c
commit
9d26a47c17
|
@ -136,16 +136,17 @@ def is_valid_onion_v3_domain(domain):
|
||||||
def is_valid_onion_domain(domain):
|
def is_valid_onion_domain(domain):
|
||||||
if not domain.endswith('.onion'):
|
if not domain.endswith('.onion'):
|
||||||
return False
|
return False
|
||||||
domain = domain.replace('.onion', '', 1)
|
return is_valid_onion_v3_domain(domain)
|
||||||
if len(domain) == 16: # v2 address
|
# domain = domain.replace('.onion', '', 1)
|
||||||
r_onion = r'[a-z0-9]{16}'
|
# if len(domain) == 16: # v2 address
|
||||||
if re.match(r_onion, domain):
|
# r_onion = r'[a-z0-9]{16}'
|
||||||
return True
|
# if re.match(r_onion, domain):
|
||||||
elif len(domain) == 56: # v3 address
|
# return True
|
||||||
r_onion = r'[a-z0-9]{56}'
|
# elif len(domain) == 56: # v3 address
|
||||||
if re.fullmatch(r_onion, domain):
|
# r_onion = r'[a-z0-9]{56}'
|
||||||
return True
|
# if re.fullmatch(r_onion, domain):
|
||||||
return False
|
# return True
|
||||||
|
# return False
|
||||||
|
|
||||||
def is_valid_domain(domain):
|
def is_valid_domain(domain):
|
||||||
faup.decode(domain)
|
faup.decode(domain)
|
||||||
|
|
|
@ -23,7 +23,6 @@ sys.path.append(os.environ['AIL_BIN'])
|
||||||
##################################
|
##################################
|
||||||
from modules.abstract_module import AbstractModule
|
from modules.abstract_module import AbstractModule
|
||||||
from lib.ConfigLoader import ConfigLoader
|
from lib.ConfigLoader import ConfigLoader
|
||||||
from lib.objects.Items import Item
|
|
||||||
from lib import crawlers
|
from lib import crawlers
|
||||||
|
|
||||||
class Onion(AbstractModule):
|
class Onion(AbstractModule):
|
||||||
|
@ -35,9 +34,9 @@ class Onion(AbstractModule):
|
||||||
config_loader = ConfigLoader()
|
config_loader = ConfigLoader()
|
||||||
self.r_cache = config_loader.get_redis_conn("Redis_Cache")
|
self.r_cache = config_loader.get_redis_conn("Redis_Cache")
|
||||||
|
|
||||||
self.pending_seconds = config_loader.get_config_int("Onion", "max_execution_time")
|
self.pending_seconds = 10
|
||||||
# regex timeout
|
# regex timeout
|
||||||
self.regex_timeout = 30
|
self.regex_timeout = config_loader.get_config_int("Onion", "max_execution_time")
|
||||||
|
|
||||||
self.faup = crawlers.get_faup()
|
self.faup = crawlers.get_faup()
|
||||||
|
|
||||||
|
@ -80,6 +79,7 @@ class Onion(AbstractModule):
|
||||||
# String to tuple
|
# String to tuple
|
||||||
x = x[2:-2].replace(" '", "").split("',")
|
x = x[2:-2].replace(" '", "").split("',")
|
||||||
url = x[0]
|
url = x[0]
|
||||||
|
url = url.lower()
|
||||||
print(url)
|
print(url)
|
||||||
|
|
||||||
# TODO Crawl subdomain
|
# TODO Crawl subdomain
|
||||||
|
@ -108,5 +108,4 @@ class Onion(AbstractModule):
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
module = Onion()
|
module = Onion()
|
||||||
# module.compute('submitted/2022/10/10/submitted_705d1d92-7e9a-4a44-8c21-ccd167bfb7db.gz 9')
|
|
||||||
module.run()
|
module.run()
|
||||||
|
|
Loading…
Reference in New Issue