chg: [PgpDump] add PgpDump backend

TODO: UI
pull/422/head
Terrtia 2019-05-14 17:49:31 +02:00
parent 0389b9c23b
commit 6480744641
No known key found for this signature in database
GPG Key ID: 1E1B1F50D84613D0
5 changed files with 254 additions and 0 deletions

View File

@ -152,6 +152,56 @@ Redis and ARDB overview
| binary_hash:**hash** | **item** | **nb_seen** |
| hexadecimal_hash:**hash** | **item** | **nb_seen** |
#### PgpDump
##### Hset:
| Key | Field | Value |
| ------ | ------ | ------ |
| pgp_metadata_key:*key id* | first_seen | **date** |
| | last_seen | **date** |
| | |
| pgp_metadata_name:*name* | first_seen | **date** |
| | last_seen | **date** |
| | |
| pgp_metadata_mail:*mail* | first_seen | **date** |
| | last_seen | **date** |
##### set:
| Key | Value |
| ------ | ------ |
| pgp_key:*key id* | *item_path* |
| | |
| pgp_name:*name* | *item_path* |
| | |
| pgp_mail:*mail* | *item_path* |
##### Hset date:
| Key | Field | Value |
| ------ | ------ |
| pgp:key:*date* | *key* | *nb seen* |
| | |
| pgp:name:*date* | *name* | *nb seen* |
| | |
| pgp:mail:*date* | *mail* | *nb seen* |
##### zset:
| Key | Field | Value |
| ------ | ------ | ------ |
| pgp_all:key | *key* | *nb seen* |
| | |
| pgp_all:name | *name* | *nb seen* |
| | |
| pgp_all:mail | *mail* | *nb seen* |
##### set:
| Key | Value |
| ------ | ------ |
| item_pgp_key:*item_path* | *key* |
| | |
| item_pgp_name:*item_path* | *name* |
| | |
| item_pgp_mail:*item_path* | *mail* |
## DB9 - Crawler:
##### Hset:

View File

@ -108,6 +108,15 @@ def search_key(paste):
p.populate_set_out(msg, 'Tags')
find = True
if '-----BEGIN PGP PUBLIC KEY BLOCK-----' in content:
p.populate_set_out(message, 'PgpDump')
if '-----BEGIN PGP SIGNATURE-----' in content:
p.populate_set_out(message, 'PgpDump')
if '-----BEGIN PGP MESSAGE-----' in content:
p.populate_set_out(message, 'PgpDump')
if find :
#Send to duplicate

188
bin/PgpDump.py Executable file
View File

@ -0,0 +1,188 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
PgpDum module
Extract ID from PGP Blocks
"""
import os
import re
import redis
import signal
import datetime
import subprocess
from pubsublogger import publisher
from bs4 import BeautifulSoup
from Helper import Process
from packages import Paste
class TimeoutException(Exception):
pass
def timeout_handler(signum, frame):
raise TimeoutException
signal.signal(signal.SIGALRM, timeout_handler)
def remove_html(item_content):
if bool(BeautifulSoup(item_content, "html.parser").find()):
soup = BeautifulSoup(item_content, 'html.parser')
# kill all script and style elements
for script in soup(["script", "style"]):
script.extract() # remove
# get text
text = soup.get_text()
return text
else:
return item_content
def extract_all_id(item_content, regex):
# max execution time on regex
signal.alarm(max_execution_time)
try:
pgp_extracted_block = re.findall(regex, item_content)
except TimeoutException:
pgp_extracted_block = []
p.incr_module_timeout_statistic() # add encoder type
print ("{0} processing timeout".format(paste.p_rel_path))
else:
signal.alarm(0)
for pgp_to_dump in pgp_extracted_block:
pgp_packet = get_pgp_packet(pgp_to_dump)
extract_id_from_output(pgp_packet)
def get_pgp_packet(save_path):
save_path = '{}'.format(save_path)
process1 = subprocess.Popen([ 'echo', '-e', save_path], stdout=subprocess.PIPE)
process2 = subprocess.Popen([ 'pgpdump'], stdin=process1.stdout, stdout=subprocess.PIPE)
process1.stdout.close()
output = process2.communicate()[0].decode()
return output
def extract_id_from_output(pgp_dump_outpout):
all_user_id = set(re.findall(regex_user_id, pgp_dump_outpout))
for user_id in all_user_id:
user_id = user_id.replace(user_id_str, '', 1)
mail = None
if ' <' in user_id:
name, mail = user_id.rsplit(' <', 1)
mail = mail[:-1]
set_name.add(name)
set_mail.add(mail)
else:
name = user_id
set_name.add(name)
all_key_id = set(re.findall(regex_key_id, pgp_dump_outpout))
for key_id in all_key_id:
key_id = key_id.replace(key_id_str, '', 1)
set_key.add(key_id)
def save_pgp_data(type_pgp, date, item_path, data):
# create basic medata
if not serv_metadata.exists('pgp_{}:{}'.format(type_pgp, data)):
serv_metadata.hset('pgp_metadata_{}:{}'.format(type_pgp, data), 'first_seen', date)
serv_metadata.hset('pgp_metadata_{}:{}'.format(type_pgp, data), 'last_seen', date)
else:
last_seen = serv_metadata.hget('pgp_metadata_{}:{}'.format(type_pgp, data), 'last_seen')
if not last_seen:
serv_metadata.hset('pgp_metadata_{}:{}'.format(type_pgp, data), 'last_seen', date)
else:
if int(last_seen) < int(date):
serv_metadata.hset('pgp_metadata_{}:{}'.format(type_pgp, data), 'last_seen', date)
# global set
serv_metadata.sadd('pgp_{}:{}'.format(type_pgp, data), item_path)
# daily
serv_metadata.hincrby('pgp:{}:{}'.format(type_pgp, date), data, 1)
# all type
serv_metadata.zincrby('pgp_all:{}'.format(type_pgp), data, 1)
# item_metadata
serv_metadata.sadd('item_pgp_{}:{}'.format(type_pgp, item_path), data)
if __name__ == '__main__':
# If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
# Port of the redis instance used by pubsublogger
publisher.port = 6380
# Script is the default channel used for the modules.
publisher.channel = 'Script'
# Section name in bin/packages/modules.cfg
#config_section = 'PgpDump'
config_section = 'PgpDump'
# Setup the I/O queues
p = Process(config_section)
serv_metadata = redis.StrictRedis(
host=p.config.get("ARDB_Metadata", "host"),
port=p.config.getint("ARDB_Metadata", "port"),
db=p.config.getint("ARDB_Metadata", "db"),
decode_responses=True)
# Sent to the logging a description of the module
publisher.info("PgpDump started")
user_id_str = 'User ID - '
regex_user_id= '{}.+'.format(user_id_str)
key_id_str = 'Key ID - '
regex_key_id = '{}.+'.format(key_id_str)
regex_pgp_public_blocs = '-----BEGIN PGP PUBLIC KEY BLOCK-----[\s\S]+?-----END PGP PUBLIC KEY BLOCK-----'
regex_pgp_signature = '-----BEGIN PGP SIGNATURE-----[\s\S]+?-----END PGP SIGNATURE-----'
regex_pgp_message = '-----BEGIN PGP MESSAGE-----[\s\S]+?-----END PGP MESSAGE-----'
re.compile(regex_user_id)
re.compile(regex_key_id)
re.compile(regex_pgp_public_blocs)
re.compile(regex_pgp_signature)
re.compile(regex_pgp_message)
max_execution_time = p.config.getint("PgpDump", "max_execution_time")
# Endless loop getting messages from the input queue
while True:
# Get one message from the input queue
message = p.get_from_set()
if message is None:
publisher.debug("{} queue is empty, waiting".format(config_section))
time.sleep(1)
continue
set_key = set()
set_name = set()
set_mail = set()
paste = Paste.Paste(message)
# Do something with the message from the queue
date = str(paste._get_p_date())
content = paste.get_p_content()
content = remove_html(content)
extract_all_id(content, regex_pgp_public_blocs)
extract_all_id(content, regex_pgp_signature)
extract_all_id(content, regex_pgp_message)
for key_id in set_key:
print(key_id)
save_pgp_data('key', date, message, key_id)
for name_id in set_name:
print(name_id)
save_pgp_data('name', date, message, name_id)
for mail_id in set_mail:
print(mail_id)
save_pgp_data('mail', date, message, mail_id)

View File

@ -71,6 +71,9 @@ max_execution_time = 90
[Onion]
max_execution_time = 180
[PgpDump]
max_execution_time = 60
[Base64]
path = Base64/
max_execution_time = 60

View File

@ -118,6 +118,10 @@ publish = Redis_Duplicate,Redis_Tags
[Keys]
subscribe = Redis_Global
publish = Redis_Duplicate,Redis_PgpDump,Redis_Tags
[PgpDump]
subscribe = Redis_PgpDump
publish = Redis_Duplicate,Redis_Tags
[ApiKey]