mirror of https://github.com/CIRCL/AIL-framework
				
				
				
			
		
			
				
	
	
		
			69 lines
		
	
	
		
			2.3 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
			
		
		
	
	
			69 lines
		
	
	
		
			2.3 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
| #!/usr/bin/env python3
 | |
| # -*-coding:UTF-8 -*
 | |
| 
 | |
| import os
 | |
| import sys
 | |
| import time
 | |
| 
 | |
| sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
 | |
| import ConfigLoader
 | |
| import crawlers
 | |
| 
 | |
| config_loader = ConfigLoader.ConfigLoader()
 | |
| r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
 | |
| config_loader = None
 | |
| 
 | |
| # # TODO: lauch me in core screen
 | |
| # # TODO: check if already launched in tor screen
 | |
| 
 | |
| # # TODO: handle mutltiple splash_manager
 | |
| if __name__ == '__main__':
 | |
| 
 | |
|     is_manager_connected = crawlers.ping_splash_manager()
 | |
|     if not is_manager_connected:
 | |
|         print('Error, Can\'t connect to Splash manager')
 | |
|         session_uuid = None
 | |
|     else:
 | |
|         print('Splash manager connected')
 | |
|         session_uuid = crawlers.get_splash_manager_session_uuid()
 | |
|         is_manager_connected = crawlers.reload_splash_and_proxies_list()
 | |
|         print(is_manager_connected)
 | |
|         if is_manager_connected:
 | |
|             if crawlers.test_ail_crawlers():
 | |
|                 crawlers.relaunch_crawlers()
 | |
|     last_check = int(time.time())
 | |
| 
 | |
|     while True:
 | |
| 
 | |
|         # # TODO: avoid multiple ping
 | |
| 
 | |
|         # check if manager is connected
 | |
|         if int(time.time()) - last_check > 60:
 | |
|             is_manager_connected = crawlers.is_splash_manager_connected()
 | |
|             current_session_uuid = crawlers.get_splash_manager_session_uuid()
 | |
|             # reload proxy and splash list
 | |
|             if current_session_uuid and current_session_uuid != session_uuid:
 | |
|                 is_manager_connected = crawlers.reload_splash_and_proxies_list()
 | |
|                 if is_manager_connected:
 | |
|                     print('reload proxies and splash list')
 | |
|                     if crawlers.test_ail_crawlers():
 | |
|                         crawlers.relaunch_crawlers()
 | |
|                     session_uuid = current_session_uuid
 | |
|             if not is_manager_connected:
 | |
|                 print('Error, Can\'t connect to Splash manager')
 | |
|             last_check = int(time.time())
 | |
| 
 | |
|             # # TODO: lauch crawlers if was never connected
 | |
|         # refresh splash and proxy list
 | |
|         elif False:
 | |
|             crawlers.reload_splash_and_proxies_list()
 | |
|             print('list of splash and proxies refreshed')
 | |
|         else:
 | |
|             time.sleep(5)
 | |
| 
 | |
|         # kill/launch new crawler / crawler manager check if already launched
 | |
| 
 | |
| 
 | |
|     # # TODO: handle mutltiple splash_manager
 | |
|     # catch reload request
 |