mirror of https://github.com/CIRCL/AIL-framework
parent
0389b9c23b
commit
6480744641
50
OVERVIEW.md
50
OVERVIEW.md
|
@ -152,6 +152,56 @@ Redis and ARDB overview
|
|||
| binary_hash:**hash** | **item** | **nb_seen** |
|
||||
| hexadecimal_hash:**hash** | **item** | **nb_seen** |
|
||||
|
||||
#### PgpDump
|
||||
|
||||
##### Hset:
|
||||
| Key | Field | Value |
|
||||
| ------ | ------ | ------ |
|
||||
| pgp_metadata_key:*key id* | first_seen | **date** |
|
||||
| | last_seen | **date** |
|
||||
| | |
|
||||
| pgp_metadata_name:*name* | first_seen | **date** |
|
||||
| | last_seen | **date** |
|
||||
| | |
|
||||
| pgp_metadata_mail:*mail* | first_seen | **date** |
|
||||
| | last_seen | **date** |
|
||||
|
||||
##### set:
|
||||
| Key | Value |
|
||||
| ------ | ------ |
|
||||
| pgp_key:*key id* | *item_path* |
|
||||
| | |
|
||||
| pgp_name:*name* | *item_path* |
|
||||
| | |
|
||||
| pgp_mail:*mail* | *item_path* |
|
||||
|
||||
##### Hset date:
|
||||
| Key | Field | Value |
|
||||
| ------ | ------ |
|
||||
| pgp:key:*date* | *key* | *nb seen* |
|
||||
| | |
|
||||
| pgp:name:*date* | *name* | *nb seen* |
|
||||
| | |
|
||||
| pgp:mail:*date* | *mail* | *nb seen* |
|
||||
|
||||
##### zset:
|
||||
| Key | Field | Value |
|
||||
| ------ | ------ | ------ |
|
||||
| pgp_all:key | *key* | *nb seen* |
|
||||
| | |
|
||||
| pgp_all:name | *name* | *nb seen* |
|
||||
| | |
|
||||
| pgp_all:mail | *mail* | *nb seen* |
|
||||
|
||||
##### set:
|
||||
| Key | Value |
|
||||
| ------ | ------ |
|
||||
| item_pgp_key:*item_path* | *key* |
|
||||
| | |
|
||||
| item_pgp_name:*item_path* | *name* |
|
||||
| | |
|
||||
| item_pgp_mail:*item_path* | *mail* |
|
||||
|
||||
## DB9 - Crawler:
|
||||
|
||||
##### Hset:
|
||||
|
|
|
@ -108,6 +108,15 @@ def search_key(paste):
|
|||
p.populate_set_out(msg, 'Tags')
|
||||
find = True
|
||||
|
||||
if '-----BEGIN PGP PUBLIC KEY BLOCK-----' in content:
|
||||
p.populate_set_out(message, 'PgpDump')
|
||||
|
||||
if '-----BEGIN PGP SIGNATURE-----' in content:
|
||||
p.populate_set_out(message, 'PgpDump')
|
||||
|
||||
if '-----BEGIN PGP MESSAGE-----' in content:
|
||||
p.populate_set_out(message, 'PgpDump')
|
||||
|
||||
if find :
|
||||
|
||||
#Send to duplicate
|
||||
|
|
|
@ -0,0 +1,188 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*-coding:UTF-8 -*
|
||||
"""
|
||||
PgpDum module
|
||||
|
||||
Extract ID from PGP Blocks
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import redis
|
||||
import signal
|
||||
import datetime
|
||||
import subprocess
|
||||
|
||||
from pubsublogger import publisher
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from Helper import Process
|
||||
from packages import Paste
|
||||
|
||||
class TimeoutException(Exception):
|
||||
pass
|
||||
|
||||
def timeout_handler(signum, frame):
|
||||
raise TimeoutException
|
||||
|
||||
signal.signal(signal.SIGALRM, timeout_handler)
|
||||
|
||||
def remove_html(item_content):
|
||||
if bool(BeautifulSoup(item_content, "html.parser").find()):
|
||||
soup = BeautifulSoup(item_content, 'html.parser')
|
||||
# kill all script and style elements
|
||||
for script in soup(["script", "style"]):
|
||||
script.extract() # remove
|
||||
|
||||
# get text
|
||||
text = soup.get_text()
|
||||
return text
|
||||
else:
|
||||
return item_content
|
||||
|
||||
def extract_all_id(item_content, regex):
|
||||
# max execution time on regex
|
||||
signal.alarm(max_execution_time)
|
||||
try:
|
||||
pgp_extracted_block = re.findall(regex, item_content)
|
||||
except TimeoutException:
|
||||
pgp_extracted_block = []
|
||||
p.incr_module_timeout_statistic() # add encoder type
|
||||
print ("{0} processing timeout".format(paste.p_rel_path))
|
||||
else:
|
||||
signal.alarm(0)
|
||||
|
||||
for pgp_to_dump in pgp_extracted_block:
|
||||
pgp_packet = get_pgp_packet(pgp_to_dump)
|
||||
extract_id_from_output(pgp_packet)
|
||||
|
||||
def get_pgp_packet(save_path):
|
||||
save_path = '{}'.format(save_path)
|
||||
process1 = subprocess.Popen([ 'echo', '-e', save_path], stdout=subprocess.PIPE)
|
||||
process2 = subprocess.Popen([ 'pgpdump'], stdin=process1.stdout, stdout=subprocess.PIPE)
|
||||
process1.stdout.close()
|
||||
output = process2.communicate()[0].decode()
|
||||
return output
|
||||
|
||||
|
||||
def extract_id_from_output(pgp_dump_outpout):
|
||||
all_user_id = set(re.findall(regex_user_id, pgp_dump_outpout))
|
||||
for user_id in all_user_id:
|
||||
user_id = user_id.replace(user_id_str, '', 1)
|
||||
mail = None
|
||||
if ' <' in user_id:
|
||||
name, mail = user_id.rsplit(' <', 1)
|
||||
mail = mail[:-1]
|
||||
set_name.add(name)
|
||||
set_mail.add(mail)
|
||||
else:
|
||||
name = user_id
|
||||
set_name.add(name)
|
||||
|
||||
all_key_id = set(re.findall(regex_key_id, pgp_dump_outpout))
|
||||
for key_id in all_key_id:
|
||||
key_id = key_id.replace(key_id_str, '', 1)
|
||||
set_key.add(key_id)
|
||||
|
||||
def save_pgp_data(type_pgp, date, item_path, data):
|
||||
# create basic medata
|
||||
if not serv_metadata.exists('pgp_{}:{}'.format(type_pgp, data)):
|
||||
serv_metadata.hset('pgp_metadata_{}:{}'.format(type_pgp, data), 'first_seen', date)
|
||||
serv_metadata.hset('pgp_metadata_{}:{}'.format(type_pgp, data), 'last_seen', date)
|
||||
else:
|
||||
last_seen = serv_metadata.hget('pgp_metadata_{}:{}'.format(type_pgp, data), 'last_seen')
|
||||
if not last_seen:
|
||||
serv_metadata.hset('pgp_metadata_{}:{}'.format(type_pgp, data), 'last_seen', date)
|
||||
else:
|
||||
if int(last_seen) < int(date):
|
||||
serv_metadata.hset('pgp_metadata_{}:{}'.format(type_pgp, data), 'last_seen', date)
|
||||
|
||||
# global set
|
||||
serv_metadata.sadd('pgp_{}:{}'.format(type_pgp, data), item_path)
|
||||
|
||||
# daily
|
||||
serv_metadata.hincrby('pgp:{}:{}'.format(type_pgp, date), data, 1)
|
||||
|
||||
# all type
|
||||
serv_metadata.zincrby('pgp_all:{}'.format(type_pgp), data, 1)
|
||||
|
||||
# item_metadata
|
||||
serv_metadata.sadd('item_pgp_{}:{}'.format(type_pgp, item_path), data)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
|
||||
# Port of the redis instance used by pubsublogger
|
||||
publisher.port = 6380
|
||||
# Script is the default channel used for the modules.
|
||||
publisher.channel = 'Script'
|
||||
|
||||
# Section name in bin/packages/modules.cfg
|
||||
#config_section = 'PgpDump'
|
||||
config_section = 'PgpDump'
|
||||
|
||||
# Setup the I/O queues
|
||||
p = Process(config_section)
|
||||
|
||||
serv_metadata = redis.StrictRedis(
|
||||
host=p.config.get("ARDB_Metadata", "host"),
|
||||
port=p.config.getint("ARDB_Metadata", "port"),
|
||||
db=p.config.getint("ARDB_Metadata", "db"),
|
||||
decode_responses=True)
|
||||
|
||||
# Sent to the logging a description of the module
|
||||
publisher.info("PgpDump started")
|
||||
|
||||
user_id_str = 'User ID - '
|
||||
regex_user_id= '{}.+'.format(user_id_str)
|
||||
|
||||
key_id_str = 'Key ID - '
|
||||
regex_key_id = '{}.+'.format(key_id_str)
|
||||
regex_pgp_public_blocs = '-----BEGIN PGP PUBLIC KEY BLOCK-----[\s\S]+?-----END PGP PUBLIC KEY BLOCK-----'
|
||||
regex_pgp_signature = '-----BEGIN PGP SIGNATURE-----[\s\S]+?-----END PGP SIGNATURE-----'
|
||||
regex_pgp_message = '-----BEGIN PGP MESSAGE-----[\s\S]+?-----END PGP MESSAGE-----'
|
||||
|
||||
re.compile(regex_user_id)
|
||||
re.compile(regex_key_id)
|
||||
re.compile(regex_pgp_public_blocs)
|
||||
re.compile(regex_pgp_signature)
|
||||
re.compile(regex_pgp_message)
|
||||
|
||||
max_execution_time = p.config.getint("PgpDump", "max_execution_time")
|
||||
|
||||
# Endless loop getting messages from the input queue
|
||||
while True:
|
||||
# Get one message from the input queue
|
||||
message = p.get_from_set()
|
||||
|
||||
if message is None:
|
||||
publisher.debug("{} queue is empty, waiting".format(config_section))
|
||||
time.sleep(1)
|
||||
continue
|
||||
|
||||
|
||||
set_key = set()
|
||||
set_name = set()
|
||||
set_mail = set()
|
||||
paste = Paste.Paste(message)
|
||||
|
||||
# Do something with the message from the queue
|
||||
date = str(paste._get_p_date())
|
||||
content = paste.get_p_content()
|
||||
content = remove_html(content)
|
||||
|
||||
extract_all_id(content, regex_pgp_public_blocs)
|
||||
extract_all_id(content, regex_pgp_signature)
|
||||
extract_all_id(content, regex_pgp_message)
|
||||
|
||||
for key_id in set_key:
|
||||
print(key_id)
|
||||
save_pgp_data('key', date, message, key_id)
|
||||
|
||||
for name_id in set_name:
|
||||
print(name_id)
|
||||
save_pgp_data('name', date, message, name_id)
|
||||
|
||||
for mail_id in set_mail:
|
||||
print(mail_id)
|
||||
save_pgp_data('mail', date, message, mail_id)
|
|
@ -71,6 +71,9 @@ max_execution_time = 90
|
|||
[Onion]
|
||||
max_execution_time = 180
|
||||
|
||||
[PgpDump]
|
||||
max_execution_time = 60
|
||||
|
||||
[Base64]
|
||||
path = Base64/
|
||||
max_execution_time = 60
|
||||
|
|
|
@ -118,6 +118,10 @@ publish = Redis_Duplicate,Redis_Tags
|
|||
|
||||
[Keys]
|
||||
subscribe = Redis_Global
|
||||
publish = Redis_Duplicate,Redis_PgpDump,Redis_Tags
|
||||
|
||||
[PgpDump]
|
||||
subscribe = Redis_PgpDump
|
||||
publish = Redis_Duplicate,Redis_Tags
|
||||
|
||||
[ApiKey]
|
||||
|
|
Loading…
Reference in New Issue