AIL-framework/bin/ModuleInformation.py

355 lines
16 KiB
Python
Raw Normal View History

2018-05-04 13:53:29 +02:00
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
2016-08-26 10:07:06 +02:00
'''
This module can be use to see information of running modules.
These information are logged in "logs/moduleInfo.log"
It can also try to manage them by killing inactive one.
However, it does not support mutliple occurence of the same module
(It will kill the first one obtained by get)
'''
import time
import datetime
import redis
import os
import signal
import argparse
from subprocess import PIPE, Popen
2018-04-16 14:50:04 +02:00
import configparser
2016-08-24 15:53:00 +02:00
import json
from terminaltables import AsciiTable
import textwrap
from colorama import Fore, Back, Style, init
import curses
2016-08-24 15:53:00 +02:00
# CONFIG VARIABLES
kill_retry_threshold = 60 #1m
2016-08-24 15:53:00 +02:00
log_filename = "../logs/moduleInfo.log"
command_search_pid = "ps a -o pid,cmd | grep {}"
command_search_name = "ps a -o pid,cmd | grep {}"
command_restart_module = "screen -S \"Script\" -X screen -t \"{}\" bash -c \"./{}.py; read x\""
init() #Necesary for colorama
printarrayGlob = [None]*14
printarrayGlob.insert(0, ["Time", "Module", "PID", "Action"])
lastTimeKillCommand = {}
#Curses init
#stdscr = curses.initscr()
#curses.cbreak()
#stdscr.keypad(1)
# GLOBAL
last_refresh = 0
2016-08-24 17:28:39 +02:00
def getPid(module):
p = Popen([command_search_pid.format(module+".py")], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True)
for line in p.stdout:
2018-04-16 14:50:04 +02:00
print(line)
splittedLine = line.split()
if 'python2' in splittedLine:
2016-08-24 17:28:39 +02:00
return int(splittedLine[0])
return None
2016-08-24 17:28:39 +02:00
def clearRedisModuleInfo():
for k in server.keys("MODULE_*"):
server.delete(k)
inst_time = datetime.datetime.fromtimestamp(int(time.time()))
printarrayGlob.insert(1, [inst_time, "*", "-", "Cleared redis module info"])
printarrayGlob.pop()
2016-08-24 17:28:39 +02:00
def cleanRedis():
for k in server.keys("MODULE_TYPE_*"):
moduleName = k[12:].split('_')[0]
for pid in server.smembers(k):
flag_pid_valid = False
proc = Popen([command_search_name.format(pid)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True)
for line in proc.stdout:
splittedLine = line.split()
if ('python2' in splittedLine or 'python' in splittedLine) and "./"+moduleName+".py" in splittedLine:
flag_pid_valid = True
if not flag_pid_valid:
2018-04-16 14:50:04 +02:00
print(flag_pid_valid, 'cleaning', pid, 'in', k)
server.srem(k, pid)
inst_time = datetime.datetime.fromtimestamp(int(time.time()))
printarrayGlob.insert(1, [inst_time, moduleName, pid, "Cleared invalid pid in " + k])
printarrayGlob.pop()
#time.sleep(5)
def kill_module(module, pid):
2018-04-16 14:50:04 +02:00
print('')
print('-> trying to kill module:', module)
2016-08-24 17:28:39 +02:00
if pid is None:
2018-04-16 14:50:04 +02:00
print('pid was None')
printarrayGlob.insert(1, [0, module, pid, "PID was None"])
printarrayGlob.pop()
pid = getPid(module)
else: #Verify that the pid is at least in redis
if server.exists("MODULE_"+module+"_"+str(pid)) == 0:
return
lastTimeKillCommand[pid] = int(time.time())
2016-08-24 17:28:39 +02:00
if pid is not None:
try:
os.kill(pid, signal.SIGUSR1)
except OSError:
2018-04-16 14:50:04 +02:00
print(pid, 'already killed')
inst_time = datetime.datetime.fromtimestamp(int(time.time()))
printarrayGlob.insert(1, [inst_time, module, pid, "Already killed"])
printarrayGlob.pop()
return
2016-08-24 17:28:39 +02:00
time.sleep(1)
if getPid(module) is None:
2018-04-16 14:50:04 +02:00
print(module, 'has been killed')
print('restarting', module, '...')
p2 = Popen([command_restart_module.format(module, module)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True)
inst_time = datetime.datetime.fromtimestamp(int(time.time()))
printarrayGlob.insert(1, [inst_time, module, pid, "Killed"])
printarrayGlob.insert(1, [inst_time, module, "?", "Restarted"])
printarrayGlob.pop()
printarrayGlob.pop()
2016-08-24 15:53:00 +02:00
2016-08-24 17:28:39 +02:00
else:
2018-04-16 14:50:04 +02:00
print('killing failed, retrying...')
inst_time = datetime.datetime.fromtimestamp(int(time.time()))
printarrayGlob.insert(1, [inst_time, module, pid, "Killing #1 failed."])
printarrayGlob.pop()
time.sleep(1)
2016-08-24 17:28:39 +02:00
os.kill(pid, signal.SIGUSR1)
time.sleep(1)
if getPid(module) is None:
2018-04-16 14:50:04 +02:00
print(module, 'has been killed')
print('restarting', module, '...')
2016-08-24 17:28:39 +02:00
p2 = Popen([command_restart_module.format(module, module)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True)
inst_time = datetime.datetime.fromtimestamp(int(time.time()))
printarrayGlob.insert(1, [inst_time, module, pid, "Killed"])
printarrayGlob.insert(1, [inst_time, module, "?", "Restarted"])
printarrayGlob.pop()
printarrayGlob.pop()
2016-08-24 17:28:39 +02:00
else:
2018-04-16 14:50:04 +02:00
print('killing failed!')
inst_time = datetime.datetime.fromtimestamp(int(time.time()))
printarrayGlob.insert(1, [inst_time, module, pid, "Killing failed!"])
printarrayGlob.pop()
else:
2018-04-16 14:50:04 +02:00
print('Module does not exist')
inst_time = datetime.datetime.fromtimestamp(int(time.time()))
printarrayGlob.insert(1, [inst_time, module, pid, "Killing failed, module not found"])
printarrayGlob.pop()
#time.sleep(5)
cleanRedis()
2016-08-24 15:53:00 +02:00
def get_color(time, idle):
if time is not None:
temp = time.split(':')
time = int(temp[0])*3600 + int(temp[1])*60 + int(temp[2])
if time >= args.treshold:
if not idle:
return Back.RED + Style.BRIGHT
else:
return Back.MAGENTA + Style.BRIGHT
elif time > args.treshold/2:
return Back.YELLOW + Style.BRIGHT
else:
return Back.GREEN + Style.BRIGHT
else:
return Style.RESET_ALL
def waiting_refresh():
global last_refresh
if time.time() - last_refresh < args.refresh:
return False
else:
last_refresh = time.time()
return True
2018-04-16 14:50:04 +02:00
2016-08-24 15:53:00 +02:00
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Show info concerning running modules and log suspected stucked modules. May be use to automatically kill and restart stucked one.')
parser.add_argument('-r', '--refresh', type=int, required=False, default=1, help='Refresh rate')
parser.add_argument('-t', '--treshold', type=int, required=False, default=60*10*1, help='Refresh rate')
parser.add_argument('-k', '--autokill', type=int, required=False, default=0, help='Enable auto kill option (1 for TRUE, anything else for FALSE)')
parser.add_argument('-c', '--clear', type=int, required=False, default=0, help='Clear the current module information (Used to clear data from old launched modules)')
args = parser.parse_args()
configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg')
if not os.path.exists(configfile):
raise Exception('Unable to find the configuration file. \
Did you set environment variables? \
Or activate the virtualenv.')
2018-04-16 14:50:04 +02:00
cfg = configparser.ConfigParser()
cfg.read(configfile)
# REDIS #
server = redis.StrictRedis(
host=cfg.get("Redis_Queues", "host"),
port=cfg.getint("Redis_Queues", "port"),
2018-05-04 13:53:29 +02:00
db=cfg.getint("Redis_Queues", "db"),
decode_responses=True)
if args.clear == 1:
clearRedisModuleInfo()
lastTime = datetime.datetime.now()
module_file_array = set()
no_info_modules = {}
path_allmod = os.path.join(os.environ['AIL_HOME'], 'doc/all_modules.txt')
with open(path_allmod, 'r') as module_file:
for line in module_file:
module_file_array.add(line[:-1])
cleanRedis()
while True:
if waiting_refresh():
#key = ''
#while key != 'q':
# key = stdsrc.getch()
# stdscr.refresh()
2018-04-16 14:50:04 +02:00
all_queue = set()
printarray1 = []
printarray2 = []
printarray3 = []
2018-04-16 14:50:04 +02:00
for queue, card in server.hgetall("queues").items():
all_queue.add(queue)
key = "MODULE_" + queue + "_"
keySet = "MODULE_TYPE_" + queue
array_module_type = []
2018-04-16 14:50:04 +02:00
for moduleNum in server.smembers(keySet):
value = server.get(key + str(moduleNum))
if value is not None:
timestamp, path = value.split(", ")
if timestamp is not None and path is not None:
startTime_readable = datetime.datetime.fromtimestamp(int(timestamp))
processed_time_readable = str((datetime.datetime.now() - startTime_readable)).split('.')[0]
2018-04-16 14:50:04 +02:00
if int(card) > 0:
if int((datetime.datetime.now() - startTime_readable).total_seconds()) > args.treshold:
log = open(log_filename, 'a')
log.write(json.dumps([queue, card, str(startTime_readable), str(processed_time_readable), path]) + "\n")
try:
last_kill_try = time.time() - lastTimeKillCommand[moduleNum]
except KeyError:
last_kill_try = kill_retry_threshold+1
if args.autokill == 1 and last_kill_try > kill_retry_threshold :
kill_module(queue, int(moduleNum))
2018-04-16 14:50:04 +02:00
array_module_type.append([get_color(processed_time_readable, False) + str(queue), str(moduleNum), str(card), str(startTime_readable), str(processed_time_readable), str(path) + get_color(None, False)])
2018-04-16 14:50:04 +02:00
else:
printarray2.append([get_color(processed_time_readable, True) + str(queue), str(moduleNum), str(card), str(startTime_readable), str(processed_time_readable), str(path) + get_color(None, True)])
array_module_type.sort(lambda x,y: cmp(x[4], y[4]), reverse=True)
for e in array_module_type:
printarray1.append(e)
2018-04-16 14:50:04 +02:00
for curr_queue in module_file_array:
if curr_queue not in all_queue:
printarray3.append([curr_queue, "Not running"])
else:
if len(list(server.smembers('MODULE_TYPE_'+curr_queue))) == 0:
if curr_queue not in no_info_modules:
no_info_modules[curr_queue] = int(time.time())
printarray3.append([curr_queue, "No data"])
else:
#If no info since long time, try to kill
if args.autokill == 1:
if int(time.time()) - no_info_modules[curr_queue] > args.treshold:
kill_module(curr_queue, None)
no_info_modules[curr_queue] = int(time.time())
printarray3.append([curr_queue, "Stuck or idle, restarting in " + str(abs(args.treshold - (int(time.time()) - no_info_modules[curr_queue]))) + "s"])
else:
printarray3.append([curr_queue, "Stuck or idle, restarting disabled"])
2018-04-16 14:50:04 +02:00
## FIXME To add:
## Button KILL Process using Curses
2018-04-16 14:50:04 +02:00
printarray1.sort(key=lambda x: x[0][9:], reverse=False)
printarray2.sort(key=lambda x: x[0][9:], reverse=False)
printarray1.insert(0,["Queue", "PID", "Amount", "Paste start time", "Processing time for current paste (H:M:S)", "Paste hash"])
printarray2.insert(0,["Queue", "PID","Amount", "Paste start time", "Time since idle (H:M:S)", "Last paste hash"])
printarray3.insert(0,["Queue", "State"])
2018-04-16 14:50:04 +02:00
os.system('clear')
t1 = AsciiTable(printarray1, title="Working queues")
t1.column_max_width(1)
if not t1.ok:
longest_col = t1.column_widths.index(max(t1.column_widths))
max_length_col = t1.column_max_width(longest_col)
if max_length_col > 0:
for i, content in enumerate(t1.table_data):
if len(content[longest_col]) > max_length_col:
temp = ''
for l in content[longest_col].splitlines():
if len(l) > max_length_col:
temp += '\n'.join(textwrap.wrap(l, max_length_col)) + '\n'
else:
temp += l + '\n'
content[longest_col] = temp.strip()
t1.table_data[i] = content
2018-04-16 14:50:04 +02:00
t2 = AsciiTable(printarray2, title="Idling queues")
t2.column_max_width(1)
if not t2.ok:
longest_col = t2.column_widths.index(max(t2.column_widths))
max_length_col = t2.column_max_width(longest_col)
if max_length_col > 0:
for i, content in enumerate(t2.table_data):
if len(content[longest_col]) > max_length_col:
temp = ''
for l in content[longest_col].splitlines():
if len(l) > max_length_col:
temp += '\n'.join(textwrap.wrap(l, max_length_col)) + '\n'
else:
temp += l + '\n'
content[longest_col] = temp.strip()
t2.table_data[i] = content
2018-04-16 14:50:04 +02:00
t3 = AsciiTable(printarray3, title="Not running queues")
t3.column_max_width(1)
2018-04-16 14:50:04 +02:00
printarray4 = []
for elem in printarrayGlob:
if elem is not None:
printarray4.append(elem)
2018-04-16 14:50:04 +02:00
t4 = AsciiTable(printarray4, title="Last actions")
t4.column_max_width(1)
2018-04-16 14:50:04 +02:00
legend_array = [["Color", "Meaning"], [Back.RED+Style.BRIGHT+" "*10+Style.RESET_ALL, "Time >=" +str(args.treshold)+Style.RESET_ALL], [Back.MAGENTA+Style.BRIGHT+" "*10+Style.RESET_ALL, "Time >=" +str(args.treshold)+" while idle"+Style.RESET_ALL], [Back.YELLOW+Style.BRIGHT+" "*10+Style.RESET_ALL, "Time >=" +str(args.treshold/2)+Style.RESET_ALL], [Back.GREEN+Style.BRIGHT+" "*10+Style.RESET_ALL, "Time <" +str(args.treshold)]]
legend = AsciiTable(legend_array, title="Legend")
legend.column_max_width(1)
2018-04-16 14:50:04 +02:00
print(legend.table)
print('\n')
print(t1.table)
print('\n')
print(t2.table)
print('\n')
print(t3.table)
print('\n')
print(t4.table9)
if (datetime.datetime.now() - lastTime).total_seconds() > args.refresh*5:
lastTime = datetime.datetime.now()
cleanRedis()
#time.sleep(args.refresh)