AIL-framework/bin/lib/regex_helper.py

109 lines
3.3 KiB
Python
Raw Normal View History

2020-05-20 17:03:58 +02:00
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
Regex Helper
"""
import os
import re
import sys
import uuid
from multiprocessing import Process as Proc
sys.path.append(os.environ['AIL_BIN'])
from pubsublogger import publisher
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib import ConfigLoader
from lib import Statistics
2020-05-20 17:03:58 +02:00
## LOAD CONFIG ##
config_loader = ConfigLoader.ConfigLoader()
r_serv_cache = config_loader.get_redis_conn("Redis_Cache")
config_loader = None
## -- ##
publisher.port = 6380
publisher.channel = "Script"
def generate_redis_cache_key(module_name):
new_uuid = str(uuid.uuid4())
return f'{module_name}_extracted:{new_uuid}'
2020-05-20 17:03:58 +02:00
def _regex_findall(redis_key, regex, item_content, r_set):
all_items = re.findall(regex, item_content)
if r_set:
if len(all_items) > 1:
for item in all_items:
r_serv_cache.sadd(redis_key, str(item))
2020-05-20 17:03:58 +02:00
r_serv_cache.expire(redis_key, 360)
elif all_items:
r_serv_cache.sadd(redis_key, str(all_items[0]))
2020-05-20 17:03:58 +02:00
r_serv_cache.expire(redis_key, 360)
else:
if len(all_items) > 1:
for item in all_items:
r_serv_cache.lpush(redis_key, str(item))
2020-05-20 17:03:58 +02:00
r_serv_cache.expire(redis_key, 360)
elif all_items:
r_serv_cache.lpush(redis_key, str(all_items[0]))
2020-05-20 17:03:58 +02:00
r_serv_cache.expire(redis_key, 360)
2020-05-20 17:29:51 +02:00
def regex_findall(module_name, redis_key, regex, item_id, item_content, max_time=30, r_set=True):
2020-05-20 17:03:58 +02:00
proc = Proc(target=_regex_findall, args=(redis_key, regex, item_content, r_set, ))
try:
proc.start()
proc.join(max_time)
if proc.is_alive():
proc.terminate()
2020-05-20 17:16:02 +02:00
Statistics.incr_module_timeout_statistic(module_name)
err_mess = f"{module_name}: processing timeout: {item_id}"
2020-05-20 17:03:58 +02:00
print(err_mess)
publisher.info(err_mess)
return []
else:
if r_set:
all_items = r_serv_cache.smembers(redis_key)
else:
all_items = r_serv_cache.lrange(redis_key, 0 ,-1)
r_serv_cache.delete(redis_key)
proc.terminate()
return all_items
except KeyboardInterrupt:
print("Caught KeyboardInterrupt, terminating workers")
proc.terminate()
sys.exit(0)
2020-06-24 15:07:45 +02:00
def _regex_search(redis_key, regex, item_content):
first_occ = regex.search(item_content)
if first_occ:
r_serv_cache.set(redis_key, first_occ)
2020-06-24 15:07:45 +02:00
def regex_search(module_name, redis_key, regex, item_id, item_content, max_time=30):
proc = Proc(target=_regex_search, args=(redis_key, regex, item_content, ))
try:
proc.start()
proc.join(max_time)
if proc.is_alive():
proc.terminate()
Statistics.incr_module_timeout_statistic(module_name)
err_mess = f"{module_name}: processing timeout: {item_id}"
2020-06-24 15:07:45 +02:00
print(err_mess)
publisher.info(err_mess)
return None
else:
first_occ = r_serv_cache.get(redis_key)
r_serv_cache.delete(redis_key)
proc.terminate()
return first_occ
except KeyboardInterrupt:
print("Caught KeyboardInterrupt, terminating workers")
proc.terminate()
sys.exit(0)