2020-05-20 17:03:58 +02:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
# -*-coding:UTF-8 -*
|
|
|
|
|
|
|
|
"""
|
|
|
|
Regex Helper
|
|
|
|
"""
|
|
|
|
|
|
|
|
import os
|
|
|
|
import re
|
|
|
|
import sys
|
|
|
|
import uuid
|
|
|
|
|
|
|
|
from multiprocessing import Process as Proc
|
|
|
|
|
|
|
|
sys.path.append(os.environ['AIL_BIN'])
|
|
|
|
from pubsublogger import publisher
|
|
|
|
|
2022-11-28 15:01:40 +01:00
|
|
|
sys.path.append(os.environ['AIL_BIN'])
|
|
|
|
##################################
|
|
|
|
# Import Project packages
|
|
|
|
##################################
|
|
|
|
from lib import ConfigLoader
|
|
|
|
from lib import Statistics
|
2020-05-20 17:03:58 +02:00
|
|
|
|
|
|
|
## LOAD CONFIG ##
|
|
|
|
config_loader = ConfigLoader.ConfigLoader()
|
|
|
|
r_serv_cache = config_loader.get_redis_conn("Redis_Cache")
|
|
|
|
config_loader = None
|
|
|
|
## -- ##
|
|
|
|
|
|
|
|
publisher.port = 6380
|
|
|
|
publisher.channel = "Script"
|
|
|
|
|
|
|
|
def generate_redis_cache_key(module_name):
|
2022-01-19 16:20:18 +01:00
|
|
|
new_uuid = str(uuid.uuid4())
|
|
|
|
return f'{module_name}_extracted:{new_uuid}'
|
2020-05-20 17:03:58 +02:00
|
|
|
|
|
|
|
def _regex_findall(redis_key, regex, item_content, r_set):
|
|
|
|
all_items = re.findall(regex, item_content)
|
|
|
|
if r_set:
|
|
|
|
if len(all_items) > 1:
|
2022-10-25 16:25:19 +02:00
|
|
|
for item in all_items:
|
|
|
|
r_serv_cache.sadd(redis_key, str(item))
|
2020-05-20 17:03:58 +02:00
|
|
|
r_serv_cache.expire(redis_key, 360)
|
|
|
|
elif all_items:
|
2022-10-25 16:25:19 +02:00
|
|
|
r_serv_cache.sadd(redis_key, str(all_items[0]))
|
2020-05-20 17:03:58 +02:00
|
|
|
r_serv_cache.expire(redis_key, 360)
|
|
|
|
else:
|
|
|
|
if len(all_items) > 1:
|
2022-10-25 16:25:19 +02:00
|
|
|
for item in all_items:
|
|
|
|
r_serv_cache.lpush(redis_key, str(item))
|
2020-05-20 17:03:58 +02:00
|
|
|
r_serv_cache.expire(redis_key, 360)
|
|
|
|
elif all_items:
|
2022-10-25 16:25:19 +02:00
|
|
|
r_serv_cache.lpush(redis_key, str(all_items[0]))
|
2020-05-20 17:03:58 +02:00
|
|
|
r_serv_cache.expire(redis_key, 360)
|
|
|
|
|
2020-05-20 17:29:51 +02:00
|
|
|
def regex_findall(module_name, redis_key, regex, item_id, item_content, max_time=30, r_set=True):
|
2020-05-20 17:03:58 +02:00
|
|
|
|
|
|
|
proc = Proc(target=_regex_findall, args=(redis_key, regex, item_content, r_set, ))
|
|
|
|
try:
|
|
|
|
proc.start()
|
|
|
|
proc.join(max_time)
|
|
|
|
if proc.is_alive():
|
|
|
|
proc.terminate()
|
2020-05-20 17:16:02 +02:00
|
|
|
Statistics.incr_module_timeout_statistic(module_name)
|
2022-01-19 16:20:18 +01:00
|
|
|
err_mess = f"{module_name}: processing timeout: {item_id}"
|
2020-05-20 17:03:58 +02:00
|
|
|
print(err_mess)
|
|
|
|
publisher.info(err_mess)
|
|
|
|
return []
|
|
|
|
else:
|
|
|
|
if r_set:
|
|
|
|
all_items = r_serv_cache.smembers(redis_key)
|
|
|
|
else:
|
|
|
|
all_items = r_serv_cache.lrange(redis_key, 0 ,-1)
|
|
|
|
r_serv_cache.delete(redis_key)
|
|
|
|
proc.terminate()
|
|
|
|
return all_items
|
|
|
|
except KeyboardInterrupt:
|
|
|
|
print("Caught KeyboardInterrupt, terminating workers")
|
|
|
|
proc.terminate()
|
|
|
|
sys.exit(0)
|
2020-06-24 15:07:45 +02:00
|
|
|
|
|
|
|
def _regex_search(redis_key, regex, item_content):
|
|
|
|
first_occ = regex.search(item_content)
|
2020-06-29 11:01:30 +02:00
|
|
|
if first_occ:
|
|
|
|
r_serv_cache.set(redis_key, first_occ)
|
2020-06-24 15:07:45 +02:00
|
|
|
|
|
|
|
def regex_search(module_name, redis_key, regex, item_id, item_content, max_time=30):
|
|
|
|
proc = Proc(target=_regex_search, args=(redis_key, regex, item_content, ))
|
|
|
|
try:
|
|
|
|
proc.start()
|
|
|
|
proc.join(max_time)
|
|
|
|
if proc.is_alive():
|
|
|
|
proc.terminate()
|
|
|
|
Statistics.incr_module_timeout_statistic(module_name)
|
2022-01-19 16:20:18 +01:00
|
|
|
err_mess = f"{module_name}: processing timeout: {item_id}"
|
2020-06-24 15:07:45 +02:00
|
|
|
print(err_mess)
|
|
|
|
publisher.info(err_mess)
|
|
|
|
return None
|
|
|
|
else:
|
|
|
|
first_occ = r_serv_cache.get(redis_key)
|
|
|
|
r_serv_cache.delete(redis_key)
|
|
|
|
proc.terminate()
|
|
|
|
return first_occ
|
|
|
|
except KeyboardInterrupt:
|
|
|
|
print("Caught KeyboardInterrupt, terminating workers")
|
|
|
|
proc.terminate()
|
|
|
|
sys.exit(0)
|