mirror of https://github.com/CIRCL/AIL-framework
				
				
				
			
		
			
				
	
	
		
			103 lines
		
	
	
		
			3.2 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
			
		
		
	
	
			103 lines
		
	
	
		
			3.2 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
| #!/usr/bin/env python3
 | |
| # -*-coding:UTF-8 -*
 | |
| 
 | |
| """
 | |
| Regex Helper
 | |
| """
 | |
| 
 | |
| import os
 | |
| import re
 | |
| import sys
 | |
| import uuid
 | |
| 
 | |
| from multiprocessing import Process as Proc
 | |
| 
 | |
| sys.path.append(os.environ['AIL_BIN'])
 | |
| from pubsublogger import publisher
 | |
| 
 | |
| sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
 | |
| import ConfigLoader
 | |
| import Statistics
 | |
| 
 | |
| ## LOAD CONFIG ##
 | |
| config_loader = ConfigLoader.ConfigLoader()
 | |
| r_serv_cache = config_loader.get_redis_conn("Redis_Cache")
 | |
| config_loader = None
 | |
| ## -- ##
 | |
| 
 | |
| publisher.port = 6380
 | |
| publisher.channel = "Script"
 | |
| 
 | |
| def generate_redis_cache_key(module_name):
 | |
|     return '{}_extracted:{}'.format(module_name, str(uuid.uuid4()))
 | |
| 
 | |
| def _regex_findall(redis_key, regex, item_content, r_set):
 | |
|     all_items = re.findall(regex, item_content)
 | |
|     if r_set:
 | |
|         if len(all_items) > 1:
 | |
|             r_serv_cache.sadd(redis_key, *all_items)
 | |
|             r_serv_cache.expire(redis_key, 360)
 | |
|         elif all_items:
 | |
|             r_serv_cache.sadd(redis_key, all_items[0])
 | |
|             r_serv_cache.expire(redis_key, 360)
 | |
|     else:
 | |
|         if len(all_items) > 1:
 | |
|             r_serv_cache.lpush(redis_key, *all_items)
 | |
|             r_serv_cache.expire(redis_key, 360)
 | |
|         elif all_items:
 | |
|             r_serv_cache.lpush(redis_key, all_items[0])
 | |
|             r_serv_cache.expire(redis_key, 360)
 | |
| 
 | |
| def regex_findall(module_name, redis_key, regex, item_id, item_content, max_time=30, r_set=True):
 | |
| 
 | |
|     proc = Proc(target=_regex_findall, args=(redis_key, regex, item_content, r_set, ))
 | |
|     try:
 | |
|         proc.start()
 | |
|         proc.join(max_time)
 | |
|         if proc.is_alive():
 | |
|             proc.terminate()
 | |
|             Statistics.incr_module_timeout_statistic(module_name)
 | |
|             err_mess = "{}: processing timeout: {}".format(module_name, item_id)
 | |
|             print(err_mess)
 | |
|             publisher.info(err_mess)
 | |
|             return []
 | |
|         else:
 | |
|             if r_set:
 | |
|                 all_items = r_serv_cache.smembers(redis_key)
 | |
|             else:
 | |
|                 all_items = r_serv_cache.lrange(redis_key, 0 ,-1)
 | |
|             r_serv_cache.delete(redis_key)
 | |
|             proc.terminate()
 | |
|             return all_items
 | |
|     except KeyboardInterrupt:
 | |
|         print("Caught KeyboardInterrupt, terminating workers")
 | |
|         proc.terminate()
 | |
|         sys.exit(0)
 | |
| 
 | |
| def _regex_search(redis_key, regex, item_content):
 | |
|     first_occ = regex.search(item_content)
 | |
|     if first_occ:
 | |
|         r_serv_cache.set(redis_key, first_occ)
 | |
| 
 | |
| def regex_search(module_name, redis_key, regex, item_id, item_content, max_time=30):
 | |
|     proc = Proc(target=_regex_search, args=(redis_key, regex, item_content, ))
 | |
|     try:
 | |
|         proc.start()
 | |
|         proc.join(max_time)
 | |
|         if proc.is_alive():
 | |
|             proc.terminate()
 | |
|             Statistics.incr_module_timeout_statistic(module_name)
 | |
|             err_mess = "{}: processing timeout: {}".format(module_name, item_id)
 | |
|             print(err_mess)
 | |
|             publisher.info(err_mess)
 | |
|             return None
 | |
|         else:
 | |
|             first_occ = r_serv_cache.get(redis_key)
 | |
|             r_serv_cache.delete(redis_key)
 | |
|             proc.terminate()
 | |
|             return first_occ
 | |
|     except KeyboardInterrupt:
 | |
|         print("Caught KeyboardInterrupt, terminating workers")
 | |
|         proc.terminate()
 | |
|         sys.exit(0)
 |