2020-06-09 18:33:41 +02:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
# -*-coding:UTF-8 -*
|
|
|
|
|
|
|
|
import os
|
|
|
|
import sys
|
2020-07-24 08:54:54 +02:00
|
|
|
import time
|
2020-06-09 18:33:41 +02:00
|
|
|
|
|
|
|
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
|
|
|
|
import ConfigLoader
|
|
|
|
import crawlers
|
|
|
|
|
|
|
|
config_loader = ConfigLoader.ConfigLoader()
|
|
|
|
r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
|
|
|
|
config_loader = None
|
|
|
|
|
2020-07-27 15:46:09 +02:00
|
|
|
# # TODO: lauch me in core screen
|
|
|
|
# # TODO: check if already launched in tor screen
|
|
|
|
|
2020-07-24 08:54:54 +02:00
|
|
|
# # TODO: handle mutltiple splash_manager
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
2020-07-27 15:46:09 +02:00
|
|
|
is_manager_connected = crawlers.ping_splash_manager()
|
|
|
|
if not is_manager_connected:
|
|
|
|
print('Error, Can\'t connect to Splash manager')
|
|
|
|
session_uuid = None
|
|
|
|
else:
|
|
|
|
print('Splash manager connected')
|
|
|
|
session_uuid = crawlers.get_splash_manager_session_uuid()
|
|
|
|
is_manager_connected = crawlers.reload_splash_and_proxies_list()
|
|
|
|
print(is_manager_connected)
|
|
|
|
if is_manager_connected:
|
2021-03-29 20:27:20 +02:00
|
|
|
if crawlers.test_ail_crawlers():
|
|
|
|
crawlers.relaunch_crawlers()
|
2020-07-27 15:46:09 +02:00
|
|
|
last_check = int(time.time())
|
2020-07-24 08:54:54 +02:00
|
|
|
|
|
|
|
while True:
|
|
|
|
|
2020-08-17 21:52:57 +02:00
|
|
|
# # TODO: avoid multiple ping
|
|
|
|
|
2020-07-27 15:46:09 +02:00
|
|
|
# check if manager is connected
|
|
|
|
if int(time.time()) - last_check > 60:
|
|
|
|
is_manager_connected = crawlers.is_splash_manager_connected()
|
|
|
|
current_session_uuid = crawlers.get_splash_manager_session_uuid()
|
|
|
|
# reload proxy and splash list
|
|
|
|
if current_session_uuid and current_session_uuid != session_uuid:
|
|
|
|
is_manager_connected = crawlers.reload_splash_and_proxies_list()
|
|
|
|
if is_manager_connected:
|
|
|
|
print('reload proxies and splash list')
|
2021-03-29 20:27:20 +02:00
|
|
|
if crawlers.test_ail_crawlers():
|
|
|
|
crawlers.relaunch_crawlers()
|
2020-07-27 15:46:09 +02:00
|
|
|
session_uuid = current_session_uuid
|
|
|
|
if not is_manager_connected:
|
|
|
|
print('Error, Can\'t connect to Splash manager')
|
|
|
|
last_check = int(time.time())
|
|
|
|
|
|
|
|
# # TODO: lauch crawlers if was never connected
|
2020-07-24 08:54:54 +02:00
|
|
|
# refresh splash and proxy list
|
2020-07-27 15:46:09 +02:00
|
|
|
elif False:
|
2020-07-24 08:54:54 +02:00
|
|
|
crawlers.reload_splash_and_proxies_list()
|
|
|
|
print('list of splash and proxies refreshed')
|
|
|
|
else:
|
2020-07-27 15:46:09 +02:00
|
|
|
time.sleep(5)
|
|
|
|
|
|
|
|
# kill/launch new crawler / crawler manager check if already launched
|
|
|
|
|
2020-07-24 08:54:54 +02:00
|
|
|
|
|
|
|
# # TODO: handle mutltiple splash_manager
|
2020-07-27 15:46:09 +02:00
|
|
|
# catch reload request
|