chg: [screenshot + har directory] add option to change screenshots directory

pull/549/head
Terrtia 2021-01-08 17:37:18 +01:00
parent 054d15a446
commit 11d537e2eb
No known key found for this signature in database
GPG Key ID: 1E1B1F50D84613D0
8 changed files with 32 additions and 12 deletions

View File

@ -41,6 +41,15 @@ class ConfigLoader(object):
db=self.cfg.getint(redis_name, "db"), db=self.cfg.getint(redis_name, "db"),
decode_responses=decode_responses ) decode_responses=decode_responses )
def get_files_directory(self, key_name):
directory_path = self.cfg.get('Directories', key_name)
# full path
if directory_path[0] == '/':
return directory_path
else:
directory_path = os.path.join(os.environ['AIL_HOME'], directory_path)
return directory_path
def get_config_str(self, section, key_name): def get_config_str(self, section, key_name):
return self.cfg.get(section, key_name) return self.cfg.get(section, key_name)

View File

@ -20,7 +20,7 @@ import ConfigLoader
config_loader = ConfigLoader.ConfigLoader() config_loader = ConfigLoader.ConfigLoader()
r_serv_onion = config_loader.get_redis_conn("ARDB_Onion") r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
SCREENSHOT_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "crawled_screenshot"), 'screenshot') SCREENSHOT_FOLDER = config_loader.get_files_directory('screenshot')
config_loader = None config_loader = None
# get screenshot relative path # get screenshot relative path

View File

@ -60,15 +60,14 @@ class HiddenServices(object):
self.paste_directory = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) self.paste_directory = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes"))
self.paste_crawled_directory = os.path.join(self.paste_directory, config_loader.get_config_str("Directories", "crawled")) self.paste_crawled_directory = os.path.join(self.paste_directory, config_loader.get_config_str("Directories", "crawled"))
self.paste_crawled_directory_name = config_loader.get_config_str("Directories", "crawled") self.paste_crawled_directory_name = config_loader.get_config_str("Directories", "crawled")
self.screenshot_directory = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "crawled_screenshot")) self.screenshot_directory = config_loader.get_files_directory('screenshot')
self.screenshot_directory_screenshot = os.path.join(self.screenshot_directory, 'screenshot')
elif type == 'i2p': elif type == 'i2p':
self.paste_directory = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "crawled_screenshot")) self.paste_directory = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "crawled_screenshot"))
self.screenshot_directory = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "crawled_screenshot")) self.screenshot_directory = config_loader.get_files_directory('screenshot')
else: else:
## TODO: # FIXME: add error ## TODO: # FIXME: add error
pass pass
config_loader = None config_loader = None
#def remove_absolute_path_link(self, key, value): #def remove_absolute_path_link(self, key, value):

View File

@ -34,7 +34,8 @@ PASTES_FOLDER = os.path.join(os.path.realpath(PASTES_FOLDER), '')
r_cache = config_loader.get_redis_conn("Redis_Cache") r_cache = config_loader.get_redis_conn("Redis_Cache")
r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
screenshot_directory = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "crawled_screenshot")) screenshot_directory = config_loader.get_files_directory('screenshot')
har_directory = config_loader.get_files_directory('har')
config_loader = None config_loader = None
@ -388,7 +389,7 @@ def get_item_screenshot(item_id):
return '' return ''
def get_item_har_name(item_id): def get_item_har_name(item_id):
har_path = os.path.join(screenshot_directory, item_id) + '.json' har_path = os.path.join(har_directory, item_id) + '.json'
if os.path.isfile(har_path): if os.path.isfile(har_path):
return har_path return har_path
else: else:

View File

@ -26,7 +26,7 @@ sys.path.append(os.environ['AIL_BIN'])
from Helper import Process from Helper import Process
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib')) sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
#import ConfigLoader import ConfigLoader
import Screenshot import Screenshot
import crawlers import crawlers
@ -133,7 +133,11 @@ class TorSplashCrawler():
config_section = 'Crawler' config_section = 'Crawler'
self.p = Process(config_section) self.p = Process(config_section)
self.item_dir = os.path.join(self.p.config.get("Directories", "crawled"), date_str ) self.item_dir = os.path.join(self.p.config.get("Directories", "crawled"), date_str )
self.har_dir = os.path.join(os.environ['AIL_HOME'], self.p.config.get("Directories", "crawled_screenshot"), date_str )
config_loader = ConfigLoader.ConfigLoader()
self.har_dir = os.path.join(config_loader.get_files_directory('har') , date_str )
config_loader = None
self.r_serv_log_submit = redis.StrictRedis( self.r_serv_log_submit = redis.StrictRedis(
host=self.p.config.get("Redis_Log_submit", "host"), host=self.p.config.get("Redis_Log_submit", "host"),
port=self.p.config.getint("Redis_Log_submit", "port"), port=self.p.config.getint("Redis_Log_submit", "port"),

View File

@ -4,7 +4,8 @@ dicofilters = Dicos
pastes = PASTES pastes = PASTES
hash = HASHS hash = HASHS
crawled = crawled crawled = crawled
crawled_screenshot = CRAWLED_SCREENSHOT har = CRAWLED_SCREENSHOT
screenshot = CRAWLED_SCREENSHOT/screenshot
wordtrending_csv = var/www/static/csv/wordstrendingdata wordtrending_csv = var/www/static/csv/wordstrendingdata
wordsfile = files/wordfile wordsfile = files/wordfile
@ -221,6 +222,11 @@ host = localhost
port = 6382 port = 6382
db = 10 db = 10
[Kvrocks_Meta]
host = localhost
port = 6383
db = 0
[Url] [Url]
cc_critical = DE cc_critical = DE
@ -278,6 +284,7 @@ default_crawler_closespider_pagecount = 50
default_crawler_user_agent = Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0 default_crawler_user_agent = Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0
splash_url = http://127.0.0.1 splash_url = http://127.0.0.1
splash_port = 8050-8052 splash_port = 8050-8052
domain_proxy = onion.foundation
[IP] [IP]
# list of comma-separated CIDR that you wish to be alerted for. e.g: # list of comma-separated CIDR that you wish to be alerted for. e.g:

View File

@ -33,7 +33,7 @@ if __name__ == '__main__':
config_loader = ConfigLoader.ConfigLoader() config_loader = ConfigLoader.ConfigLoader()
SCREENSHOT_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "crawled_screenshot")) SCREENSHOT_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "crawled_screenshot"))
NEW_SCREENSHOT_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "crawled_screenshot"), 'screenshot') NEW_SCREENSHOT_FOLDER = config_loader.get_files_directory('screenshot')
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/' PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/'

View File

@ -100,7 +100,7 @@ dict_update_description = {'v1.5':{'nb_background_update': 5, 'update_warning_me
UPLOAD_FOLDER = os.path.join(os.environ['AIL_FLASK'], 'submitted') UPLOAD_FOLDER = os.path.join(os.environ['AIL_FLASK'], 'submitted')
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/' PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/'
SCREENSHOT_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "crawled_screenshot"), 'screenshot') SCREENSHOT_FOLDER = config_loader.get_files_directory('screenshot')
REPO_ORIGIN = 'https://github.com/ail-project/ail-framework.git' REPO_ORIGIN = 'https://github.com/ail-project/ail-framework.git'