From c0171eba814bbd229c507710184c0a5feee66f94 Mon Sep 17 00:00:00 2001 From: Bastien Schils Date: Mon, 23 Sep 2019 18:22:25 +0200 Subject: [PATCH 1/9] Added: IP matching module --- bin/IPAddress.py | 105 +++++++++++++++++++++++++++++++++ bin/packages/config.cfg.sample | 2 + bin/packages/modules.cfg | 3 + 3 files changed, 110 insertions(+) create mode 100755 bin/IPAddress.py diff --git a/bin/IPAddress.py b/bin/IPAddress.py new file mode 100755 index 00000000..daca1f94 --- /dev/null +++ b/bin/IPAddress.py @@ -0,0 +1,105 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* +""" +The IP Module +====================== + +This module is consuming the global channel. + +It first performs a regex to find IP addresses and then matches those IPs to +some configured ip ranges. + +The list of IP ranges are expected to be in CIDR format (e.g. 192.168.0.0/16) +and should be defined in the config.cfg file, under the [IP] section + +""" + +import time +import re +from pubsublogger import publisher +from packages import Paste +from Helper import Process + + +# +# Thanks to Syed Sadat Nazrul +# URL: https://medium.com/@sadatnazrul/checking-if-ipv4-address-in-network-python-af61a54d714d + +def ip_to_binary(ip): + octet_list_int = ip.split(".") + octet_list_bin = [format(int(i), '08b') for i in octet_list_int] + binary = ("").join(octet_list_bin) + return binary + +def get_addr_network(address, net_size): + #Convert ip address to 32 bit binary + ip_bin = ip_to_binary(address) + #Extract Network ID from 32 binary + network = ip_bin[0:32-(32-net_size)] + return network + +def ip_in_prefix(ip_address, prefix): + #CIDR based separation of address and network size + [prefix_address, net_size] = prefix.split("/") + #Convert string to int + net_size = int(net_size) + #Get the network ID of both prefix and ip based net size + prefix_network = get_addr_network(prefix_address, net_size) + ip_network = get_addr_network(ip_address, net_size) + return ip_network == prefix_network + +def search_ip(message): + paste = Paste.Paste(message) + content = paste.get_p_content() + # regex to find IPs + reg_ip = re.compile(r'^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)', flags=re.MULTILINE) + # list of the regex results in the Paste, may be null + results = reg_ip.findall(content) + matching_ips = [] + + print(results) + + for res in results: + for network in ip_networks: + if ip_in_prefix(res,network): + matching_ips.append(res) + + if len(matching_ips) > 0: + print('{} contains {} IPs'.format(paste.p_name, len(matching_ips))) + publisher.warning('{} contains {} IPs'.format(paste.p_name, len(matching_ips))) + + #Tag message with IP + msg = 'infoleak:automatic-detection="ip";{}'.format(message) + p.populate_set_out(msg, 'Tags') + #Send to duplicate + p.populate_set_out(message, 'Duplicate') + +if __name__ == '__main__': + # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh) + # Port of the redis instance used by pubsublogger + publisher.port = 6380 + # Script is the default channel used for the modules. + publisher.channel = 'Script' + + # Section name in bin/packages/modules.cfg + config_section = 'IP' + # Setup the I/O queues + p = Process(config_section) + + ip_networks = p.config.get("IP", "networks").split(",") + + + # Sent to the logging a description of the module + publisher.info("Run IP module") + + # Endless loop getting messages from the input queue + while True: + # Get one message from the input queue + message = p.get_from_set() + if message is None: + publisher.debug("{} queue is empty, waiting".format(config_section)) + time.sleep(1) + continue + + # Do something with the message from the queue + search_ip(message) diff --git a/bin/packages/config.cfg.sample b/bin/packages/config.cfg.sample index c59e5c6b..ea19eb85 100644 --- a/bin/packages/config.cfg.sample +++ b/bin/packages/config.cfg.sample @@ -266,3 +266,5 @@ default_crawler_closespider_pagecount = 50 default_crawler_user_agent = Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0 splash_url = http://127.0.0.1 splash_port = 8050-8052 +[IP] +networks = 192.168.34.0/24,10.0.0.0/8,192.168.33.0/24 diff --git a/bin/packages/modules.cfg b/bin/packages/modules.cfg index 7c8e3138..ec38cebc 100644 --- a/bin/packages/modules.cfg +++ b/bin/packages/modules.cfg @@ -128,3 +128,6 @@ publish = Redis_Mixer [Crawler] subscribe = Redis_Crawler publish = Redis_Mixer,Redis_Tags +[IP] +subscribe = Redis_Global +publish = Redis_Duplicate,Redis_Tags From 163fa4b95ab98092d86c5132b0120b4b0748f0bb Mon Sep 17 00:00:00 2001 From: WimpyMan Date: Tue, 24 Sep 2019 10:27:56 +0200 Subject: [PATCH 2/9] config.cfg.sample: Improved example for IP module By default, the list of networks to monitor is now empty. The previous value is now given as example. --- bin/packages/config.cfg.sample | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/bin/packages/config.cfg.sample b/bin/packages/config.cfg.sample index ea19eb85..031927eb 100644 --- a/bin/packages/config.cfg.sample +++ b/bin/packages/config.cfg.sample @@ -266,5 +266,8 @@ default_crawler_closespider_pagecount = 50 default_crawler_user_agent = Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0 splash_url = http://127.0.0.1 splash_port = 8050-8052 + [IP] -networks = 192.168.34.0/24,10.0.0.0/8,192.168.33.0/24 +# list of comma-separated CIDR that you wish to be alerted for. e.g: +#networks = 192.168.34.0/24,10.0.0.0/8,192.168.33.0/24 +networks = From d5c3f28e47361d744cea989a7abb43705007cde4 Mon Sep 17 00:00:00 2001 From: WimpyMan Date: Tue, 24 Sep 2019 10:30:33 +0200 Subject: [PATCH 3/9] modules.cfg: Minor: Added \n to seperate sections --- bin/packages/modules.cfg | 1 + 1 file changed, 1 insertion(+) diff --git a/bin/packages/modules.cfg b/bin/packages/modules.cfg index ec38cebc..868cd0de 100644 --- a/bin/packages/modules.cfg +++ b/bin/packages/modules.cfg @@ -128,6 +128,7 @@ publish = Redis_Mixer [Crawler] subscribe = Redis_Crawler publish = Redis_Mixer,Redis_Tags + [IP] subscribe = Redis_Global publish = Redis_Duplicate,Redis_Tags From 4586467962b4e8169e50cc73cc20ba3fe762f460 Mon Sep 17 00:00:00 2001 From: Bastien Schils Date: Wed, 2 Oct 2019 21:33:02 +0200 Subject: [PATCH 4/9] IPAddress.py: use ipaddress module Improved readibility, maintainability and use of standard module --- bin/IPAddress.py | 40 ++++++++-------------------------------- 1 file changed, 8 insertions(+), 32 deletions(-) diff --git a/bin/IPAddress.py b/bin/IPAddress.py index daca1f94..f03ee8b3 100755 --- a/bin/IPAddress.py +++ b/bin/IPAddress.py @@ -19,35 +19,9 @@ import re from pubsublogger import publisher from packages import Paste from Helper import Process +from ipaddress import IPv4Network, IPv4Address -# -# Thanks to Syed Sadat Nazrul -# URL: https://medium.com/@sadatnazrul/checking-if-ipv4-address-in-network-python-af61a54d714d - -def ip_to_binary(ip): - octet_list_int = ip.split(".") - octet_list_bin = [format(int(i), '08b') for i in octet_list_int] - binary = ("").join(octet_list_bin) - return binary - -def get_addr_network(address, net_size): - #Convert ip address to 32 bit binary - ip_bin = ip_to_binary(address) - #Extract Network ID from 32 binary - network = ip_bin[0:32-(32-net_size)] - return network - -def ip_in_prefix(ip_address, prefix): - #CIDR based separation of address and network size - [prefix_address, net_size] = prefix.split("/") - #Convert string to int - net_size = int(net_size) - #Get the network ID of both prefix and ip based net size - prefix_network = get_addr_network(prefix_address, net_size) - ip_network = get_addr_network(ip_address, net_size) - return ip_network == prefix_network - def search_ip(message): paste = Paste.Paste(message) content = paste.get_p_content() @@ -57,12 +31,11 @@ def search_ip(message): results = reg_ip.findall(content) matching_ips = [] - print(results) - for res in results: + address = IPv4Address(res) for network in ip_networks: - if ip_in_prefix(res,network): - matching_ips.append(res) + if address in network: + matching_ips.append(address) if len(matching_ips) > 0: print('{} contains {} IPs'.format(paste.p_name, len(matching_ips))) @@ -86,7 +59,9 @@ if __name__ == '__main__': # Setup the I/O queues p = Process(config_section) - ip_networks = p.config.get("IP", "networks").split(",") + ip_networks = [] + for network in p.config.get("IP", "networks").split(","): + ip_networks.append(IPv4Network(network)) # Sent to the logging a description of the module @@ -103,3 +78,4 @@ if __name__ == '__main__': # Do something with the message from the queue search_ip(message) + From de494aa460776ec088174668df4d7e7a3a061eeb Mon Sep 17 00:00:00 2001 From: Bastien Schils Date: Wed, 2 Oct 2019 21:35:30 +0200 Subject: [PATCH 5/9] LAUNCH.py: Added execution of script IPAddress.py --- bin/LAUNCH.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bin/LAUNCH.sh b/bin/LAUNCH.sh index d87ef21e..5e31b81d 100755 --- a/bin/LAUNCH.sh +++ b/bin/LAUNCH.sh @@ -213,6 +213,8 @@ function launching_scripts { screen -S "Script_AIL" -X screen -t "UpdateBackground" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./update-background.py; read x" sleep 0.1 screen -S "Script_AIL" -X screen -t "SubmitPaste" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./submit_paste.py; read x" + sleep 0.1 + screen -S "Script_AIL" -X screen -t "IPAddress" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./IPAddress.py; read x" } From 614cdcb47cb35311c9bb34a03d16f93cbfc72abe Mon Sep 17 00:00:00 2001 From: krial057 Date: Fri, 4 Oct 2019 13:36:44 +0200 Subject: [PATCH 6/9] Fixed some typos Fixed some typos in the readme --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 753c15c7..f8cb8f62 100644 --- a/README.md +++ b/README.md @@ -42,8 +42,8 @@ Features * Multiple feed support * Each module can process and reprocess the information already processed by AIL * Detecting and extracting URLs including their geographical location (e.g. IP address location) -* Extracting and validating potential leak of credit cards numbers, credentials, ... -* Extracting and validating email addresses leaked including DNS MX validation +* Extracting and validating potential leaks of credit card numbers, credentials, ... +* Extracting and validating leaked email addresses, including DNS MX validation * Module for extracting Tor .onion addresses (to be further processed for analysis) * Keep tracks of duplicates (and diffing between each duplicate found) * Extracting and validating potential hostnames (e.g. to feed Passive DNS systems) From da581878c3899c7ca3effea52ee11da83ddd1199 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Mon, 28 Oct 2019 14:37:50 +0100 Subject: [PATCH 7/9] chg: [Keys module] detect public key --- bin/Keys.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/bin/Keys.py b/bin/Keys.py index eb06601a..237f807c 100755 --- a/bin/Keys.py +++ b/bin/Keys.py @@ -121,6 +121,13 @@ def search_key(paste): p.populate_set_out(msg, 'Tags') find = True + if '-----BEGIN PUBLIC KEY-----' in content: + publisher.warning('{} has a public key message'.format(paste.p_name)) + + msg = 'infoleak:automatic-detection="public-key";{}'.format(message) + p.populate_set_out(msg, 'Tags') + find = True + # pgp content if get_pgp_content: p.populate_set_out(message, 'PgpDump') From 143a41124d1cf861b7053ea932ffb449f4b97d54 Mon Sep 17 00:00:00 2001 From: Hendrik Date: Tue, 29 Oct 2019 00:38:33 +0100 Subject: [PATCH 8/9] Fix pybgpranking dependency in requirements Relates #334 --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index dbebad0f..70eab9a3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -50,6 +50,7 @@ flask-login bcrypt #DomainClassifier +git+https://github.com/D4-project/BGP-Ranking.git/#egg=pybgpranking&subdirectory=client DomainClassifier #Indexer requirements whoosh From 1ca1f7975ca38a88e7e3ba201eee7daccdb98f1a Mon Sep 17 00:00:00 2001 From: Terrtia Date: Tue, 29 Oct 2019 09:19:26 +0100 Subject: [PATCH 9/9] chg: [Cryptocurrency + Tools] launch by default + remove old Bitcoin module --- bin/Bitcoin.py | 142 ------------------------------------------------- bin/LAUNCH.sh | 4 +- 2 files changed, 3 insertions(+), 143 deletions(-) delete mode 100755 bin/Bitcoin.py diff --git a/bin/Bitcoin.py b/bin/Bitcoin.py deleted file mode 100755 index a3cfcfc7..00000000 --- a/bin/Bitcoin.py +++ /dev/null @@ -1,142 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* -""" -The Bitcoin Module -============================ - -It trying to extract Bitcoin address and secret key from paste - - ..seealso:: Paste method (get_regex) - -Requirements ------------- - -*Need running Redis instances. (Redis). - -""" - -from packages import Paste -from Helper import Process -from pubsublogger import publisher - -import re -import time -import redis - -from hashlib import sha256 - - -#### thank http://rosettacode.org/wiki/Bitcoin/address_validation#Python for this 2 functions - -def decode_base58(bc, length): - n = 0 - for char in bc: - n = n * 58 + digits58.index(char) - return n.to_bytes(length, 'big') - -def check_bc(bc): - try: - bcbytes = decode_base58(bc, 25) - return bcbytes[-4:] == sha256(sha256(bcbytes[:-4]).digest()).digest()[:4] - except Exception: - return False -######################################################## - -def search_key(content, message, paste): - bitcoin_address = re.findall(regex_bitcoin_public_address, content) - bitcoin_private_key = re.findall(regex_bitcoin_private_key, content) - date = str(paste._get_p_date()) - validate_address = False - key = False - if(len(bitcoin_address) >0): - #print(message) - for address in bitcoin_address: - if(check_bc(address)): - validate_address = True - print('Bitcoin address found : {}'.format(address)) - if(len(bitcoin_private_key) > 0): - for private_key in bitcoin_private_key: - print('Bitcoin private key found : {}'.format(private_key)) - key = True - # build bitcoin correlation - save_cryptocurrency_data('bitcoin', date, message, address) - - if(validate_address): - p.populate_set_out(message, 'Duplicate') - to_print = 'Bitcoin found: {} address and {} private Keys'.format(len(bitcoin_address), len(bitcoin_private_key)) - print(to_print) - publisher.warning(to_print) - - msg = 'infoleak:automatic-detection="bitcoin-address";{}'.format(message) - p.populate_set_out(msg, 'Tags') - - if(key): - msg = 'infoleak:automatic-detection="bitcoin-private-key";{}'.format(message) - p.populate_set_out(msg, 'Tags') - - to_print = 'Bitcoin;{};{};{};'.format(paste.p_source, paste.p_date, - paste.p_name) - publisher.warning('{}Detected {} Bitcoin private key;{}'.format( - to_print, len(bitcoin_private_key),paste.p_rel_path)) - -def save_cryptocurrency_data(cryptocurrency_name, date, item_path, cryptocurrency_address): - # create basic medata - if not serv_metadata.exists('cryptocurrency_metadata_{}:{}'.format(cryptocurrency_name, cryptocurrency_address)): - serv_metadata.hset('cryptocurrency_metadata_{}:{}'.format(cryptocurrency_name, cryptocurrency_address), 'first_seen', date) - serv_metadata.hset('cryptocurrency_metadata_{}:{}'.format(cryptocurrency_name, cryptocurrency_address), 'last_seen', date) - else: - last_seen = serv_metadata.hget('cryptocurrency_metadata_{}:{}'.format(cryptocurrency_name, cryptocurrency_address), 'last_seen') - if not last_seen: - serv_metadata.hset('cryptocurrency_metadata_{}:{}'.format(cryptocurrency_name, cryptocurrency_address), 'last_seen', date) - else: - if int(last_seen) < int(date): - serv_metadata.hset('cryptocurrency_metadata_{}:{}'.format(cryptocurrency_name, cryptocurrency_address), 'last_seen', date) - - # global set - serv_metadata.sadd('set_cryptocurrency_{}:{}'.format(cryptocurrency_name, cryptocurrency_address), item_path) - - # daily - serv_metadata.hincrby('cryptocurrency:{}:{}'.format(cryptocurrency_name, date), cryptocurrency_address, 1) - - # all type - serv_metadata.zincrby('cryptocurrency_all:{}'.format(cryptocurrency_name), cryptocurrency_address, 1) - - # item_metadata - serv_metadata.sadd('item_cryptocurrency_{}:{}'.format(cryptocurrency_name, item_path), cryptocurrency_address) - -if __name__ == "__main__": - publisher.port = 6380 - publisher.channel = "Script" - - config_section = 'Bitcoin' - - # Setup the I/O queues - p = Process(config_section) - - serv_metadata = redis.StrictRedis( - host=p.config.get("ARDB_Metadata", "host"), - port=p.config.getint("ARDB_Metadata", "port"), - db=p.config.getint("ARDB_Metadata", "db"), - decode_responses=True) - - # Sent to the logging a description of the module - publisher.info("Run Keys module ") - - digits58 = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz' - - regex_bitcoin_public_address = re.compile(r'(?