mirror of https://github.com/CIRCL/AIL-framework
				
				
				
			
		
			
				
	
	
		
			355 lines
		
	
	
		
			16 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
			
		
		
	
	
			355 lines
		
	
	
		
			16 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
| #!/usr/bin/env python3
 | |
| # -*-coding:UTF-8 -*
 | |
| 
 | |
| '''
 | |
| 
 | |
| This module can be use to see information of running modules.
 | |
| These information are logged in "logs/moduleInfo.log"
 | |
| 
 | |
| It can also try to manage them by killing inactive one.
 | |
| However, it does not support mutliple occurence of the same module
 | |
| (It will kill the first one obtained by get)
 | |
| 
 | |
| 
 | |
| '''
 | |
| 
 | |
| import time
 | |
| import datetime
 | |
| import redis
 | |
| import os
 | |
| import signal
 | |
| import argparse
 | |
| from subprocess import PIPE, Popen
 | |
| import configparser
 | |
| import json
 | |
| from terminaltables import AsciiTable
 | |
| import textwrap
 | |
| from colorama import Fore, Back, Style, init
 | |
| import curses
 | |
| 
 | |
| # CONFIG VARIABLES
 | |
| kill_retry_threshold = 60 #1m
 | |
| log_filename = "../logs/moduleInfo.log"
 | |
| command_search_pid = "ps a -o pid,cmd | grep {}"
 | |
| command_search_name = "ps a -o pid,cmd | grep {}"
 | |
| command_restart_module = "screen -S \"Script\" -X screen -t \"{}\" bash -c \"./{}.py; read x\""
 | |
| 
 | |
| init() #Necesary for colorama
 | |
| printarrayGlob = [None]*14
 | |
| printarrayGlob.insert(0, ["Time", "Module", "PID", "Action"])
 | |
| lastTimeKillCommand = {}
 | |
| 
 | |
| #Curses init
 | |
| #stdscr = curses.initscr()
 | |
| #curses.cbreak()
 | |
| #stdscr.keypad(1)
 | |
| 
 | |
| # GLOBAL
 | |
| last_refresh = 0
 | |
| 
 | |
| 
 | |
| def getPid(module):
 | |
|     p = Popen([command_search_pid.format(module+".py")], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True)
 | |
|     for line in p.stdout:
 | |
|         print(line)
 | |
|         splittedLine = line.split()
 | |
|         if 'python2' in splittedLine:
 | |
|             return int(splittedLine[0])
 | |
|     return None
 | |
| 
 | |
| def clearRedisModuleInfo():
 | |
|     for k in server.keys("MODULE_*"):
 | |
|         server.delete(k)
 | |
|     inst_time = datetime.datetime.fromtimestamp(int(time.time()))
 | |
|     printarrayGlob.insert(1, [inst_time, "*", "-", "Cleared redis module info"])
 | |
|     printarrayGlob.pop()
 | |
| 
 | |
| def cleanRedis():
 | |
|     for k in server.keys("MODULE_TYPE_*"):
 | |
|         moduleName = k[12:].split('_')[0]
 | |
|         for pid in server.smembers(k):
 | |
|             flag_pid_valid = False
 | |
|             proc = Popen([command_search_name.format(pid)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True)
 | |
|             for line in proc.stdout:
 | |
|                 splittedLine = line.split()
 | |
|                 if ('python2' in splittedLine or 'python' in splittedLine) and "./"+moduleName+".py" in splittedLine:
 | |
|                     flag_pid_valid = True
 | |
| 
 | |
|             if not flag_pid_valid:
 | |
|                 print(flag_pid_valid, 'cleaning', pid, 'in', k)
 | |
|                 server.srem(k, pid)
 | |
|                 inst_time = datetime.datetime.fromtimestamp(int(time.time()))
 | |
|                 printarrayGlob.insert(1, [inst_time, moduleName, pid, "Cleared invalid pid in " + k])
 | |
|                 printarrayGlob.pop()
 | |
|                 #time.sleep(5)
 | |
| 
 | |
| 
 | |
| def kill_module(module, pid):
 | |
|     print('')
 | |
|     print('-> trying to kill module:', module)
 | |
| 
 | |
|     if pid is None:
 | |
|         print('pid was None')
 | |
|         printarrayGlob.insert(1, [0, module, pid, "PID was None"])
 | |
|         printarrayGlob.pop()
 | |
|         pid = getPid(module)
 | |
|     else: #Verify that the pid is at least in redis
 | |
|         if server.exists("MODULE_"+module+"_"+str(pid)) == 0:
 | |
|             return
 | |
| 
 | |
|     lastTimeKillCommand[pid] = int(time.time())
 | |
|     if pid is not None:
 | |
|         try:
 | |
|             os.kill(pid, signal.SIGUSR1)
 | |
|         except OSError:
 | |
|             print(pid, 'already killed')
 | |
|             inst_time = datetime.datetime.fromtimestamp(int(time.time()))
 | |
|             printarrayGlob.insert(1, [inst_time, module, pid, "Already killed"])
 | |
|             printarrayGlob.pop()
 | |
|             return
 | |
|         time.sleep(1)
 | |
|         if getPid(module) is None:
 | |
|             print(module, 'has been killed')
 | |
|             print('restarting', module, '...')
 | |
|             p2 = Popen([command_restart_module.format(module, module)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True)
 | |
|             inst_time = datetime.datetime.fromtimestamp(int(time.time()))
 | |
|             printarrayGlob.insert(1, [inst_time, module, pid, "Killed"])
 | |
|             printarrayGlob.insert(1, [inst_time, module, "?", "Restarted"])
 | |
|             printarrayGlob.pop()
 | |
|             printarrayGlob.pop()
 | |
| 
 | |
|         else:
 | |
|             print('killing failed, retrying...')
 | |
|             inst_time = datetime.datetime.fromtimestamp(int(time.time()))
 | |
|             printarrayGlob.insert(1, [inst_time, module, pid, "Killing #1 failed."])
 | |
|             printarrayGlob.pop()
 | |
| 
 | |
|             time.sleep(1)
 | |
|             os.kill(pid, signal.SIGUSR1)
 | |
|             time.sleep(1)
 | |
|             if getPid(module) is None:
 | |
|                 print(module, 'has been killed')
 | |
|                 print('restarting', module, '...')
 | |
|                 p2 = Popen([command_restart_module.format(module, module)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True)
 | |
|                 inst_time = datetime.datetime.fromtimestamp(int(time.time()))
 | |
|                 printarrayGlob.insert(1, [inst_time, module, pid, "Killed"])
 | |
|                 printarrayGlob.insert(1, [inst_time, module, "?", "Restarted"])
 | |
|                 printarrayGlob.pop()
 | |
|                 printarrayGlob.pop()
 | |
|             else:
 | |
|                 print('killing failed!')
 | |
|                 inst_time = datetime.datetime.fromtimestamp(int(time.time()))
 | |
|                 printarrayGlob.insert(1, [inst_time, module, pid, "Killing failed!"])
 | |
|                 printarrayGlob.pop()
 | |
|     else:
 | |
|         print('Module does not exist')
 | |
|         inst_time = datetime.datetime.fromtimestamp(int(time.time()))
 | |
|         printarrayGlob.insert(1, [inst_time, module, pid, "Killing failed, module not found"])
 | |
|         printarrayGlob.pop()
 | |
|     #time.sleep(5)
 | |
|     cleanRedis()
 | |
| 
 | |
| def get_color(time, idle):
 | |
|     if time is not None:
 | |
|         temp = time.split(':')
 | |
|         time = int(temp[0])*3600 + int(temp[1])*60 + int(temp[2])
 | |
| 
 | |
|         if time >= args.treshold:
 | |
|             if not idle:
 | |
|                 return Back.RED + Style.BRIGHT
 | |
|             else:
 | |
|                 return Back.MAGENTA + Style.BRIGHT
 | |
|         elif time > args.treshold/2:
 | |
|             return Back.YELLOW + Style.BRIGHT
 | |
|         else:
 | |
|             return Back.GREEN + Style.BRIGHT
 | |
|     else:
 | |
|         return Style.RESET_ALL
 | |
| 
 | |
| def waiting_refresh():
 | |
|     global last_refresh
 | |
|     if time.time() - last_refresh < args.refresh:
 | |
|         return False
 | |
|     else:
 | |
|         last_refresh = time.time()
 | |
|         return True
 | |
| 
 | |
| 
 | |
| 
 | |
| if __name__ == "__main__":
 | |
| 
 | |
|     parser = argparse.ArgumentParser(description='Show info concerning running modules and log suspected stucked modules. May be use to automatically kill and restart stucked one.')
 | |
|     parser.add_argument('-r', '--refresh', type=int, required=False, default=1, help='Refresh rate')
 | |
|     parser.add_argument('-t', '--treshold', type=int, required=False, default=60*10*1, help='Refresh rate')
 | |
|     parser.add_argument('-k', '--autokill', type=int, required=False, default=0, help='Enable auto kill option (1 for TRUE, anything else for FALSE)')
 | |
|     parser.add_argument('-c', '--clear', type=int, required=False, default=0, help='Clear the current module information (Used to clear data from old launched modules)')
 | |
| 
 | |
|     args = parser.parse_args()
 | |
| 
 | |
|     configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg')
 | |
|     if not os.path.exists(configfile):
 | |
|         raise Exception('Unable to find the configuration file. \
 | |
|                         Did you set environment variables? \
 | |
|                         Or activate the virtualenv.')
 | |
| 
 | |
|     cfg = configparser.ConfigParser()
 | |
|     cfg.read(configfile)
 | |
| 
 | |
|     # REDIS #
 | |
|     server = redis.StrictRedis(
 | |
|         host=cfg.get("Redis_Queues", "host"),
 | |
|         port=cfg.getint("Redis_Queues", "port"),
 | |
|         db=cfg.getint("Redis_Queues", "db"),
 | |
|         decode_responses=True)
 | |
| 
 | |
|     if args.clear == 1:
 | |
|         clearRedisModuleInfo()
 | |
| 
 | |
|     lastTime = datetime.datetime.now()
 | |
| 
 | |
|     module_file_array = set()
 | |
|     no_info_modules = {}
 | |
|     path_allmod = os.path.join(os.environ['AIL_HOME'], 'doc/all_modules.txt')
 | |
|     with open(path_allmod, 'r') as module_file:
 | |
|         for line in module_file:
 | |
|             module_file_array.add(line[:-1])
 | |
| 
 | |
|         cleanRedis()
 | |
| 
 | |
|         while True:
 | |
|             if waiting_refresh():
 | |
| 
 | |
|                 #key = ''
 | |
|                 #while key != 'q':
 | |
|                 #    key = stdsrc.getch()
 | |
|                 #    stdscr.refresh()
 | |
| 
 | |
|                 all_queue = set()
 | |
|                 printarray1 = []
 | |
|                 printarray2 = []
 | |
|                 printarray3 = []
 | |
|                 for queue, card in server.hgetall("queues").items():
 | |
|                     all_queue.add(queue)
 | |
|                     key = "MODULE_" + queue + "_"
 | |
|                     keySet = "MODULE_TYPE_" + queue
 | |
|                     array_module_type = []
 | |
| 
 | |
|                     for moduleNum in server.smembers(keySet):
 | |
|                         value = server.get(key + str(moduleNum))
 | |
|                         if value is not None:
 | |
|                             timestamp, path = value.split(", ")
 | |
|                             if timestamp is not None and path is not None:
 | |
|                                 startTime_readable = datetime.datetime.fromtimestamp(int(timestamp))
 | |
|                                 processed_time_readable = str((datetime.datetime.now() - startTime_readable)).split('.')[0]
 | |
| 
 | |
|                                 if int(card) > 0:
 | |
|                                     if int((datetime.datetime.now() - startTime_readable).total_seconds()) > args.treshold:
 | |
|                                         log = open(log_filename, 'a')
 | |
|                                         log.write(json.dumps([queue, card, str(startTime_readable), str(processed_time_readable), path]) + "\n")
 | |
|                                         try:
 | |
|                                             last_kill_try = time.time() - lastTimeKillCommand[moduleNum]
 | |
|                                         except KeyError:
 | |
|                                             last_kill_try = kill_retry_threshold+1
 | |
|                                         if args.autokill == 1 and last_kill_try > kill_retry_threshold :
 | |
|                                             kill_module(queue, int(moduleNum))
 | |
| 
 | |
|                                     array_module_type.append([get_color(processed_time_readable, False) + str(queue), str(moduleNum), str(card), str(startTime_readable), str(processed_time_readable), str(path) + get_color(None, False)])
 | |
| 
 | |
|                                 else:
 | |
|                                     printarray2.append([get_color(processed_time_readable, True) + str(queue), str(moduleNum), str(card), str(startTime_readable), str(processed_time_readable), str(path) + get_color(None, True)])
 | |
|                             array_module_type.sort(lambda x,y: cmp(x[4], y[4]), reverse=True)
 | |
|                     for e in array_module_type:
 | |
|                         printarray1.append(e)
 | |
| 
 | |
|                 for curr_queue in module_file_array:
 | |
|                     if curr_queue not in all_queue:
 | |
|                             printarray3.append([curr_queue, "Not running"])
 | |
|                     else:
 | |
|                         if len(list(server.smembers('MODULE_TYPE_'+curr_queue))) == 0:
 | |
|                             if curr_queue not in no_info_modules:
 | |
|                                 no_info_modules[curr_queue] = int(time.time())
 | |
|                                 printarray3.append([curr_queue, "No data"])
 | |
|                             else:
 | |
|                                 #If no info since long time, try to kill
 | |
|                                 if args.autokill == 1:
 | |
|                                     if int(time.time()) - no_info_modules[curr_queue] > args.treshold:
 | |
|                                         kill_module(curr_queue, None)
 | |
|                                         no_info_modules[curr_queue] = int(time.time())
 | |
|                                     printarray3.append([curr_queue, "Stuck or idle, restarting in " + str(abs(args.treshold - (int(time.time()) - no_info_modules[curr_queue]))) + "s"])
 | |
|                                 else:
 | |
|                                     printarray3.append([curr_queue, "Stuck or idle, restarting disabled"])
 | |
| 
 | |
|                 ## FIXME To add:
 | |
|                 ## Button KILL Process using  Curses
 | |
| 
 | |
|                 printarray1.sort(key=lambda x: x[0][9:], reverse=False)
 | |
|                 printarray2.sort(key=lambda x: x[0][9:], reverse=False)
 | |
|                 printarray1.insert(0,["Queue", "PID", "Amount", "Paste start time", "Processing time for current paste (H:M:S)", "Paste hash"])
 | |
|                 printarray2.insert(0,["Queue", "PID","Amount", "Paste start time", "Time since idle (H:M:S)", "Last paste hash"])
 | |
|                 printarray3.insert(0,["Queue", "State"])
 | |
| 
 | |
|                 os.system('clear')
 | |
|                 t1 = AsciiTable(printarray1, title="Working queues")
 | |
|                 t1.column_max_width(1)
 | |
|                 if not t1.ok:
 | |
|                         longest_col = t1.column_widths.index(max(t1.column_widths))
 | |
|                         max_length_col = t1.column_max_width(longest_col)
 | |
|                         if max_length_col > 0:
 | |
|                             for i, content in enumerate(t1.table_data):
 | |
|                                 if len(content[longest_col]) > max_length_col:
 | |
|                                     temp = ''
 | |
|                                     for l in content[longest_col].splitlines():
 | |
|                                         if len(l) > max_length_col:
 | |
|                                             temp += '\n'.join(textwrap.wrap(l, max_length_col)) + '\n'
 | |
|                                         else:
 | |
|                                             temp += l + '\n'
 | |
|                                         content[longest_col] = temp.strip()
 | |
|                                 t1.table_data[i] = content
 | |
| 
 | |
|                 t2 = AsciiTable(printarray2, title="Idling queues")
 | |
|                 t2.column_max_width(1)
 | |
|                 if not t2.ok:
 | |
|                         longest_col = t2.column_widths.index(max(t2.column_widths))
 | |
|                         max_length_col = t2.column_max_width(longest_col)
 | |
|                         if max_length_col > 0:
 | |
|                             for i, content in enumerate(t2.table_data):
 | |
|                                 if len(content[longest_col]) > max_length_col:
 | |
|                                     temp = ''
 | |
|                                     for l in content[longest_col].splitlines():
 | |
|                                         if len(l) > max_length_col:
 | |
|                                             temp += '\n'.join(textwrap.wrap(l, max_length_col)) + '\n'
 | |
|                                         else:
 | |
|                                             temp += l + '\n'
 | |
|                                         content[longest_col] = temp.strip()
 | |
|                                 t2.table_data[i] = content
 | |
| 
 | |
|                 t3 = AsciiTable(printarray3, title="Not running queues")
 | |
|                 t3.column_max_width(1)
 | |
| 
 | |
|                 printarray4 = []
 | |
|                 for elem in printarrayGlob:
 | |
|                     if elem is not None:
 | |
|                         printarray4.append(elem)
 | |
| 
 | |
|                 t4 = AsciiTable(printarray4, title="Last actions")
 | |
|                 t4.column_max_width(1)
 | |
| 
 | |
|                 legend_array = [["Color", "Meaning"], [Back.RED+Style.BRIGHT+" "*10+Style.RESET_ALL, "Time >=" +str(args.treshold)+Style.RESET_ALL], [Back.MAGENTA+Style.BRIGHT+" "*10+Style.RESET_ALL, "Time >=" +str(args.treshold)+" while idle"+Style.RESET_ALL], [Back.YELLOW+Style.BRIGHT+" "*10+Style.RESET_ALL, "Time >=" +str(args.treshold/2)+Style.RESET_ALL], [Back.GREEN+Style.BRIGHT+" "*10+Style.RESET_ALL, "Time <" +str(args.treshold)]]
 | |
|                 legend = AsciiTable(legend_array, title="Legend")
 | |
|                 legend.column_max_width(1)
 | |
| 
 | |
|                 print(legend.table)
 | |
|                 print('\n')
 | |
|                 print(t1.table)
 | |
|                 print('\n')
 | |
|                 print(t2.table)
 | |
|                 print('\n')
 | |
|                 print(t3.table)
 | |
|                 print('\n')
 | |
|                 print(t4.table9)
 | |
| 
 | |
|                 if (datetime.datetime.now() - lastTime).total_seconds() > args.refresh*5:
 | |
|                     lastTime = datetime.datetime.now()
 | |
|                     cleanRedis()
 | |
|                 #time.sleep(args.refresh)
 |