2021-05-17 18:03:30 +02:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
|
|
|
import os
|
|
|
|
import sys
|
|
|
|
import unittest
|
|
|
|
|
2021-05-27 17:28:20 +02:00
|
|
|
import gzip
|
|
|
|
from base64 import b64encode
|
|
|
|
|
2021-05-17 18:03:30 +02:00
|
|
|
sys.path.append(os.environ['AIL_BIN'])
|
|
|
|
|
|
|
|
# Modules Classes
|
2021-06-02 14:42:23 +02:00
|
|
|
from modules.ApiKey import ApiKey
|
|
|
|
from modules.Categ import Categ
|
|
|
|
from modules.CreditCards import CreditCards
|
|
|
|
from modules.DomClassifier import DomClassifier
|
|
|
|
from modules.Global import Global
|
|
|
|
from modules.Keys import Keys
|
|
|
|
from modules.Onion import Onion
|
2021-05-17 18:03:30 +02:00
|
|
|
|
2021-05-19 14:54:34 +02:00
|
|
|
# project packages
|
2021-05-17 18:03:30 +02:00
|
|
|
import lib.crawlers as crawlers
|
2021-05-27 17:28:20 +02:00
|
|
|
import packages.Item as Item
|
2021-05-17 18:03:30 +02:00
|
|
|
|
2021-05-19 14:54:34 +02:00
|
|
|
class Test_Module_ApiKey(unittest.TestCase):
|
|
|
|
|
|
|
|
def setUp(self):
|
|
|
|
self.module_obj = ApiKey()
|
|
|
|
|
|
|
|
def test_module(self):
|
|
|
|
item_id = 'tests/2021/01/01/api_keys.gz'
|
|
|
|
google_api_key = 'AIza00000000000000000000000_example-KEY'
|
|
|
|
aws_access_key = 'AKIAIOSFODNN7EXAMPLE'
|
|
|
|
aws_secret_key = 'wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY'
|
|
|
|
|
2021-05-19 16:57:20 +02:00
|
|
|
matches = self.module_obj.compute(f'{item_id} 3', r_result=True)
|
2021-05-19 14:54:34 +02:00
|
|
|
self.assertCountEqual(matches[0], [google_api_key])
|
|
|
|
self.assertCountEqual(matches[1], [aws_access_key])
|
|
|
|
self.assertCountEqual(matches[2], [aws_secret_key])
|
|
|
|
|
2021-05-19 16:57:20 +02:00
|
|
|
class Test_Module_Categ(unittest.TestCase):
|
|
|
|
|
|
|
|
def setUp(self):
|
|
|
|
self.module_obj = Categ()
|
|
|
|
|
|
|
|
def test_module(self):
|
|
|
|
item_id = 'tests/2021/01/01/categ.gz'
|
|
|
|
test_categ = ['CreditCards', 'Mail', 'Onion', 'Web', 'Credential', 'Cve']
|
|
|
|
|
|
|
|
result = self.module_obj.compute(item_id, r_result=True)
|
|
|
|
self.assertCountEqual(result, test_categ)
|
|
|
|
|
2021-05-27 17:28:20 +02:00
|
|
|
class Test_Module_CreditCards(unittest.TestCase):
|
|
|
|
|
|
|
|
def setUp(self):
|
|
|
|
self.module_obj = CreditCards()
|
|
|
|
|
|
|
|
def test_module(self):
|
|
|
|
item_id = 'tests/2021/01/01/credit_cards.gz 7'
|
|
|
|
test_cards = ['341039324930797', # American Express
|
|
|
|
'6011613905509166', # Discover Card
|
|
|
|
'3547151714018657', # Japan Credit Bureau (JCB)
|
|
|
|
'5492981206527330', # 16 digits MasterCard
|
|
|
|
'4024007132849695', # '4532525919781' # 16-digit VISA, with separators
|
|
|
|
]
|
|
|
|
|
|
|
|
result = self.module_obj.compute(item_id, r_result=True)
|
|
|
|
self.assertCountEqual(result, test_cards)
|
|
|
|
|
|
|
|
class Test_Module_DomClassifier(unittest.TestCase):
|
|
|
|
|
|
|
|
def setUp(self):
|
|
|
|
self.module_obj = DomClassifier()
|
|
|
|
|
|
|
|
def test_module(self):
|
|
|
|
item_id = 'tests/2021/01/01/domain_classifier.gz'
|
|
|
|
result = self.module_obj.compute(item_id, r_result=True)
|
|
|
|
self.assertTrue(len(result))
|
|
|
|
|
|
|
|
class Test_Module_Global(unittest.TestCase):
|
|
|
|
|
|
|
|
def setUp(self):
|
|
|
|
self.module_obj = Global()
|
|
|
|
|
|
|
|
def test_module(self):
|
|
|
|
# # TODO: delete item
|
|
|
|
item_id = 'tests/2021/01/01/global.gz'
|
|
|
|
item = Item.Item(item_id)
|
|
|
|
item.delete()
|
|
|
|
|
|
|
|
item_content = b'Lorem ipsum dolor sit amet, consectetur adipiscing elit'
|
|
|
|
item_content_1 = b64encode(gzip.compress(item_content)).decode()
|
|
|
|
item_content_2 = b64encode(gzip.compress(item_content + b' more text')).decode()
|
|
|
|
message = f'{item_id} {item_content_1}'
|
|
|
|
|
|
|
|
# Test new item
|
|
|
|
result = self.module_obj.compute(message, r_result=True)
|
|
|
|
print(result)
|
|
|
|
self.assertEqual(result, item_id)
|
|
|
|
|
|
|
|
# Test duplicate
|
|
|
|
result = self.module_obj.compute(message, r_result=True)
|
|
|
|
print(result)
|
|
|
|
self.assertIsNone(result)
|
|
|
|
|
|
|
|
# Test same id with != content
|
|
|
|
message = f'{item_id} {item_content_2}'
|
|
|
|
result = self.module_obj.compute(message, r_result=True)
|
|
|
|
print(result)
|
2021-05-28 17:23:51 +02:00
|
|
|
self.assertIn(item_id[:-3], result)
|
2021-05-27 17:28:20 +02:00
|
|
|
self.assertNotEqual(result, item_id)
|
|
|
|
|
|
|
|
# cleanup
|
|
|
|
item = Item.Item(result)
|
|
|
|
item.delete()
|
|
|
|
# # TODO: remove from queue
|
|
|
|
|
2021-05-28 17:23:51 +02:00
|
|
|
class Test_Module_Keys(unittest.TestCase):
|
|
|
|
|
|
|
|
def setUp(self):
|
|
|
|
self.module_obj = Keys()
|
|
|
|
|
|
|
|
def test_module(self):
|
|
|
|
item_id = 'tests/2021/01/01/keys.gz'
|
|
|
|
# # TODO: check results
|
|
|
|
result = self.module_obj.compute(item_id)
|
|
|
|
|
2021-05-17 18:03:30 +02:00
|
|
|
class Test_Module_Onion(unittest.TestCase):
|
|
|
|
|
|
|
|
def setUp(self):
|
|
|
|
self.module_obj = Onion()
|
|
|
|
|
|
|
|
def test_module(self):
|
|
|
|
item_id = 'tests/2021/01/01/onion.gz'
|
|
|
|
domain_1 = 'eswpccgr5xyovsahffkehgleqthrasfpfdblwbs4lstd345dwq5qumqd.onion'
|
|
|
|
domain_2 = 'www.facebookcorewwwi.onion'
|
|
|
|
crawlers.queue_test_clean_up('onion', domain_1, 'tests/2021/01/01/onion.gz')
|
|
|
|
|
|
|
|
self.module_obj.compute(f'{item_id} 3')
|
|
|
|
if crawlers.is_crawler_activated():
|
|
|
|
## check domain queues
|
|
|
|
# all domains queue
|
|
|
|
self.assertTrue(crawlers.is_domain_in_queue('onion', domain_1))
|
|
|
|
# all url/item queue
|
|
|
|
self.assertTrue(crawlers.is_item_in_queue('onion', f'http://{domain_1}', item_id))
|
|
|
|
# domain blacklist
|
|
|
|
self.assertFalse(crawlers.is_domain_in_queue('onion', domain_2))
|
|
|
|
# invalid onion
|
|
|
|
self.assertFalse(crawlers.is_domain_in_queue('onion', 'invalid.onion'))
|
|
|
|
|
|
|
|
# clean DB
|
|
|
|
crawlers.queue_test_clean_up('onion', domain_1, 'tests/2021/01/01/onion.gz')
|
|
|
|
else:
|
|
|
|
# # TODO: check warning logs
|
|
|
|
pass
|
2021-05-19 14:54:34 +02:00
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
unittest.main()
|