mirror of https://github.com/CIRCL/AIL-framework
				
				
				
			chg: [Phone module] Filter Invalid Phone numbers + UI Show extracted
							parent
							
								
									7a52aec884
								
							
						
					
					
						commit
						353b290899
					
				|  | @ -249,6 +249,8 @@ function launching_scripts { | |||
|     sleep 0.1 | ||||
|     screen -S "Script_AIL" -X screen -t "PgpDump" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./PgpDump.py; read x" | ||||
|     sleep 0.1 | ||||
|     screen -S "Script_AIL" -X screen -t "Phone" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./Phone.py; read x" | ||||
|     sleep 0.1 | ||||
|     screen -S "Script_AIL" -X screen -t "Telegram" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./Telegram.py; read x" | ||||
|     sleep 0.1 | ||||
|     screen -S "Script_AIL" -X screen -t "Tools" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./Tools.py; read x" | ||||
|  | @ -290,8 +292,6 @@ function launching_scripts { | |||
|     ################################## | ||||
|     #       DISABLED MODULES         # | ||||
|     ################################## | ||||
|     # screen -S "Script_AIL" -X screen -t "Phone" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./Phone.py; read x" | ||||
|     # sleep 0.1 | ||||
|     # screen -S "Script_AIL" -X screen -t "SentimentAnalysis" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./SentimentAnalysis.py; read x" | ||||
|     # sleep 0.1 | ||||
|     # screen -S "Script_AIL" -X screen -t "Release" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Release.py; read x" | ||||
|  |  | |||
|  | @ -25,6 +25,7 @@ from modules.CreditCards import CreditCards | |||
| from modules.Iban import Iban | ||||
| from modules.Mail import Mail | ||||
| from modules.Onion import Onion | ||||
| from modules.Phone import Phone | ||||
| from modules.Tools import Tools | ||||
| 
 | ||||
| config_loader = ConfigLoader() | ||||
|  | @ -40,6 +41,7 @@ MODULES = { | |||
|     'infoleak:automatic-detection="iban"': Iban(queue=False), | ||||
|     'infoleak:automatic-detection="mail"': Mail(queue=False), | ||||
|     'infoleak:automatic-detection="onion"': Onion(queue=False), | ||||
|     'infoleak:automatic-detection="phone-number"': Phone(queue=False), | ||||
|     # APIkey ??? | ||||
|     # Credentials | ||||
|     # Zerobins | ||||
|  |  | |||
|  | @ -7,14 +7,13 @@ Regex Helper | |||
| 
 | ||||
| import os | ||||
| import logging.config | ||||
| import phonenumbers | ||||
| import re | ||||
| import sys | ||||
| import uuid | ||||
| 
 | ||||
| from multiprocessing import Process as Proc | ||||
| 
 | ||||
| sys.path.append(os.environ['AIL_BIN']) | ||||
| 
 | ||||
| sys.path.append(os.environ['AIL_BIN']) | ||||
| ################################## | ||||
| # Import Project packages | ||||
|  | @ -65,7 +64,6 @@ def regex_findall(module_name, redis_key, regex, item_id, item_content, max_time | |||
|             proc.terminate() | ||||
|             # Statistics.incr_module_timeout_statistic(module_name) | ||||
|             err_mess = f"{module_name}: processing timeout: {item_id}" | ||||
|             print(err_mess) | ||||
|             logger.info(err_mess) | ||||
|             return [] | ||||
|         else: | ||||
|  | @ -99,7 +97,6 @@ def regex_finditer(r_key, regex, item_id, content, max_time=30): | |||
|             proc.terminate() | ||||
|             # Statistics.incr_module_timeout_statistic(r_key) | ||||
|             err_mess = f"{r_key}: processing timeout: {item_id}" | ||||
|             print(err_mess) | ||||
|             logger.info(err_mess) | ||||
|             return [] | ||||
|         else: | ||||
|  | @ -130,7 +127,6 @@ def regex_search(r_key, regex, item_id, content, max_time=30): | |||
|             proc.terminate() | ||||
|             # Statistics.incr_module_timeout_statistic(r_key) | ||||
|             err_mess = f"{r_key}: processing timeout: {item_id}" | ||||
|             print(err_mess) | ||||
|             logger.info(err_mess) | ||||
|             return False | ||||
|         else: | ||||
|  | @ -144,3 +140,40 @@ def regex_search(r_key, regex, item_id, content, max_time=30): | |||
|         print("Caught KeyboardInterrupt, terminating regex worker") | ||||
|         proc.terminate() | ||||
|         sys.exit(0) | ||||
| 
 | ||||
| ## Phone Regexs ## | ||||
| def _regex_phone_iter(r_key, country_code, content): | ||||
|     iterator = phonenumbers.PhoneNumberMatcher(content, country_code) | ||||
|     for match in iterator: | ||||
|         value = match.raw_string | ||||
|         # PhoneNumberFormat.E164 | ||||
|         # value = phonenumbers.format_number(match.number, phonenumbers.PhoneNumberFormat.INTERNATIONAL) | ||||
|         start = match.start | ||||
|         end = match.end | ||||
|         r_serv_cache.rpush(r_key, f'{start}:{end}:{value}') | ||||
|     r_serv_cache.expire(r_key, 360) | ||||
| 
 | ||||
| def regex_phone_iter(r_key, country_code, item_id, content, max_time=30): | ||||
|     proc = Proc(target=_regex_phone_iter, args=(r_key, country_code, content)) | ||||
|     try: | ||||
|         proc.start() | ||||
|         proc.join(max_time) | ||||
|         if proc.is_alive(): | ||||
|             proc.terminate() | ||||
|             # Statistics.incr_module_timeout_statistic(r_key) | ||||
|             err_mess = f"{r_key}: processing timeout: {item_id}" | ||||
|             logger.info(err_mess) | ||||
|             return [] | ||||
|         else: | ||||
|             res = r_serv_cache.lrange(r_key, 0, -1) | ||||
|             r_serv_cache.delete(r_key) | ||||
|             proc.terminate() | ||||
|             all_match = [] | ||||
|             for match in res: | ||||
|                 start, end, value = match.split(':', 2) | ||||
|                 all_match.append((int(start), int(end), value)) | ||||
|             return all_match | ||||
|     except KeyboardInterrupt: | ||||
|         print("Caught KeyboardInterrupt, terminating regex worker") | ||||
|         proc.terminate() | ||||
|         sys.exit(0) | ||||
|  | @ -15,7 +15,6 @@ It apply phone number regexes on item content and warn if above a threshold. | |||
| # Import External packages | ||||
| ################################## | ||||
| import os | ||||
| import re | ||||
| import sys | ||||
| import phonenumbers | ||||
| 
 | ||||
|  | @ -34,44 +33,65 @@ class Phone(AbstractModule): | |||
| 
 | ||||
|     # regex to find phone numbers, may raise many false positives (shalt thou seek optimization, upgrading is required) | ||||
|     # reg_phone = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\d{2,3}){3,4})') | ||||
|     REG_PHONE = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\(?\d{2,4}\)?){3,4})') | ||||
|     # REG_PHONE = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\(?\d{2,4}\)?){3,4})') | ||||
| 
 | ||||
|     def __init__(self): | ||||
|         super(Phone, self).__init__() | ||||
|     def __init__(self, queue=True): | ||||
|         super(Phone, self).__init__(queue=queue) | ||||
| 
 | ||||
|         # Waiting time in seconds between to message processed | ||||
|         self.pending_seconds = 1 | ||||
| 
 | ||||
|     def extract(self, obj_id, content, tag): | ||||
|         extracted = [] | ||||
|         phones = self.regex_phone_iter('US', obj_id, content) | ||||
|         for phone in phones: | ||||
|             extracted.append([phone[0], phone[1], phone[2], f'tag:{tag}']) | ||||
|         return extracted | ||||
| 
 | ||||
|     def compute(self, message): | ||||
|         item = Item(message) | ||||
|         content = item.get_content() | ||||
|         # List of the regex results in the Item, may be null | ||||
|         results = self.REG_PHONE.findall(content) | ||||
| 
 | ||||
|         # If the list is greater than 4, we consider the Item may contain a list of phone numbers | ||||
|         if len(results) > 4: | ||||
|             self.logger.debug(results) | ||||
|             self.redis_logger.warning(f'{item.get_id()} contains PID (phone numbers)') | ||||
|         # TODO use language detection to choose the country code ? | ||||
|         results = self.regex_phone_iter('US', item.id, content) | ||||
|         for phone in results: | ||||
|             print(phone[2]) | ||||
| 
 | ||||
|         if results: | ||||
|             # TAGS | ||||
|             msg = f'infoleak:automatic-detection="phone-number";{item.get_id()}' | ||||
|             self.add_message_to_queue(msg, 'Tags') | ||||
| 
 | ||||
|             stats = {} | ||||
|             for phone_number in results: | ||||
|                 try: | ||||
|                     x = phonenumbers.parse(phone_number, None) | ||||
|                     country_code = x.country_code | ||||
|                     if stats.get(country_code) is None: | ||||
|                         stats[country_code] = 1 | ||||
|                     else: | ||||
|                         stats[country_code] = stats[country_code] + 1 | ||||
|                 except: | ||||
|                     pass | ||||
|             for country_code in stats: | ||||
|                 if stats[country_code] > 4: | ||||
|                     self.redis_logger.warning(f'{item.get_id()} contains Phone numbers with country code {country_code}') | ||||
|             self.redis_logger.warning(f'{item.get_id()} contains {len(phone)} Phone numbers') | ||||
| 
 | ||||
|         # # List of the regex results in the Item, may be null | ||||
|         # results = self.REG_PHONE.findall(content) | ||||
|         # | ||||
|         # # If the list is greater than 4, we consider the Item may contain a list of phone numbers | ||||
|         # if len(results) > 4: | ||||
|         #     self.logger.debug(results) | ||||
|         #     self.redis_logger.warning(f'{item.get_id()} contains PID (phone numbers)') | ||||
|         # | ||||
|         #     msg = f'infoleak:automatic-detection="phone-number";{item.get_id()}' | ||||
|         #     self.add_message_to_queue(msg, 'Tags') | ||||
|         # | ||||
|         #     stats = {} | ||||
|         #     for phone_number in results: | ||||
|         #         try: | ||||
|         #             x = phonenumbers.parse(phone_number, None) | ||||
|         #             country_code = x.country_code | ||||
|         #             if stats.get(country_code) is None: | ||||
|         #                 stats[country_code] = 1 | ||||
|         #             else: | ||||
|         #                 stats[country_code] = stats[country_code] + 1 | ||||
|         #         except: | ||||
|         #             pass | ||||
|         #     for country_code in stats: | ||||
|         #         if stats[country_code] > 4: | ||||
|         #             self.redis_logger.warning(f'{item.get_id()} contains Phone numbers with country code {country_code}') | ||||
| 
 | ||||
| 
 | ||||
| if __name__ == '__main__': | ||||
|     module = Phone() | ||||
|     module.run() | ||||
|     # module.run() | ||||
|     module.compute('crawled/2023/02/21/circl.luc90be694-a559-4d77-bfa4-9c54ea8bc2f7') | ||||
|  |  | |||
|  | @ -110,6 +110,17 @@ class AbstractModule(ABC): | |||
|         return regex_helper.regex_findall(self.module_name, self.r_cache_key, regex, obj_id, content, | ||||
|                                           max_time=self.max_execution_time, r_set=r_set) | ||||
| 
 | ||||
|     def regex_phone_iter(self, country_code, obj_id, content): | ||||
|         """ | ||||
|         regex findall helper (force timeout) | ||||
|         :param regex: compiled regex | ||||
|         :param obj_id: object id | ||||
|         :param content: object content | ||||
|         :param r_set: return result as set | ||||
|         """ | ||||
|         return regex_helper.regex_phone_iter(self.r_cache_key, country_code, obj_id, content, | ||||
|                                              max_time=self.max_execution_time) | ||||
| 
 | ||||
|     def run(self): | ||||
|         """ | ||||
|         Run Module endless process | ||||
|  |  | |||
|  | @ -128,10 +128,9 @@ publish = Duplicate,Tags | |||
| subscribe = Cve | ||||
| publish = Tags | ||||
| 
 | ||||
| # Disabled  | ||||
| #[Phone] | ||||
| #subscribe = Item | ||||
| #publish = Tags | ||||
| [Phone] | ||||
| subscribe = Item | ||||
| publish = Tags | ||||
| 
 | ||||
| [Keys] | ||||
| subscribe = Item | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	 Terrtia
						Terrtia