AIL-framework/bin/lib/regex_helper.py

103 lines
3.2 KiB
Python
Raw Normal View History

2020-05-20 17:03:58 +02:00
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
Regex Helper
"""
import os
import re
import sys
import uuid
from multiprocessing import Process as Proc
sys.path.append(os.environ['AIL_BIN'])
from pubsublogger import publisher
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
import ConfigLoader
2020-05-20 17:16:02 +02:00
import Statistics
2020-05-20 17:03:58 +02:00
## LOAD CONFIG ##
config_loader = ConfigLoader.ConfigLoader()
r_serv_cache = config_loader.get_redis_conn("Redis_Cache")
config_loader = None
## -- ##
publisher.port = 6380
publisher.channel = "Script"
def generate_redis_cache_key(module_name):
return '{}_extracted:{}'.format(module_name, str(uuid.uuid4()))
def _regex_findall(redis_key, regex, item_content, r_set):
all_items = re.findall(regex, item_content)
if r_set:
if len(all_items) > 1:
r_serv_cache.sadd(redis_key, *all_items)
r_serv_cache.expire(redis_key, 360)
elif all_items:
r_serv_cache.sadd(redis_key, all_items[0])
r_serv_cache.expire(redis_key, 360)
else:
if len(all_items) > 1:
r_serv_cache.lpush(redis_key, *all_items)
r_serv_cache.expire(redis_key, 360)
elif all_items:
r_serv_cache.lpush(redis_key, all_items[0])
r_serv_cache.expire(redis_key, 360)
2020-05-20 17:29:51 +02:00
def regex_findall(module_name, redis_key, regex, item_id, item_content, max_time=30, r_set=True):
2020-05-20 17:03:58 +02:00
proc = Proc(target=_regex_findall, args=(redis_key, regex, item_content, r_set, ))
try:
proc.start()
proc.join(max_time)
if proc.is_alive():
proc.terminate()
2020-05-20 17:16:02 +02:00
Statistics.incr_module_timeout_statistic(module_name)
2020-05-20 17:03:58 +02:00
err_mess = "{}: processing timeout: {}".format(module_name, item_id)
print(err_mess)
publisher.info(err_mess)
return []
else:
if r_set:
all_items = r_serv_cache.smembers(redis_key)
else:
all_items = r_serv_cache.lrange(redis_key, 0 ,-1)
r_serv_cache.delete(redis_key)
proc.terminate()
return all_items
except KeyboardInterrupt:
print("Caught KeyboardInterrupt, terminating workers")
proc.terminate()
sys.exit(0)
2020-06-24 15:07:45 +02:00
def _regex_search(redis_key, regex, item_content):
first_occ = regex.search(item_content)
if first_occ:
r_serv_cache.set(redis_key, first_occ)
2020-06-24 15:07:45 +02:00
def regex_search(module_name, redis_key, regex, item_id, item_content, max_time=30):
proc = Proc(target=_regex_search, args=(redis_key, regex, item_content, ))
try:
proc.start()
proc.join(max_time)
if proc.is_alive():
proc.terminate()
Statistics.incr_module_timeout_statistic(module_name)
err_mess = "{}: processing timeout: {}".format(module_name, item_id)
print(err_mess)
publisher.info(err_mess)
return None
else:
first_occ = r_serv_cache.get(redis_key)
r_serv_cache.delete(redis_key)
proc.terminate()
return first_occ
except KeyboardInterrupt:
print("Caught KeyboardInterrupt, terminating workers")
proc.terminate()
sys.exit(0)