2019-05-14 17:49:31 +02:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
# -*-coding:UTF-8 -*
|
|
|
|
"""
|
|
|
|
PgpDum module
|
|
|
|
|
|
|
|
Extract ID from PGP Blocks
|
|
|
|
"""
|
|
|
|
|
|
|
|
import os
|
|
|
|
import re
|
2019-05-20 14:32:07 +02:00
|
|
|
import time
|
2019-06-05 16:20:26 +02:00
|
|
|
import uuid
|
2019-05-14 17:49:31 +02:00
|
|
|
import redis
|
|
|
|
import signal
|
|
|
|
import datetime
|
|
|
|
import subprocess
|
|
|
|
|
|
|
|
from pubsublogger import publisher
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
|
|
|
|
from Helper import Process
|
|
|
|
from packages import Paste
|
|
|
|
|
|
|
|
class TimeoutException(Exception):
|
|
|
|
pass
|
|
|
|
|
|
|
|
def timeout_handler(signum, frame):
|
|
|
|
raise TimeoutException
|
|
|
|
|
|
|
|
signal.signal(signal.SIGALRM, timeout_handler)
|
|
|
|
|
2019-06-05 16:20:26 +02:00
|
|
|
# save pgp message in directory, process one time each day
|
|
|
|
def save_in_file(message, pgp_content):
|
|
|
|
print('save in file')
|
|
|
|
UUID = str(uuid.uuid4())
|
|
|
|
file_name = os.path.join(pgp_dump_dir_to_process, UUID)
|
|
|
|
with open(file_name, 'w') as f:
|
|
|
|
f.write(pgp_content)
|
|
|
|
r_serv_db.sadd('pgpdumb:uuid', '{};{}'.format(UUID, message))
|
|
|
|
|
2019-05-14 17:49:31 +02:00
|
|
|
def remove_html(item_content):
|
|
|
|
if bool(BeautifulSoup(item_content, "html.parser").find()):
|
|
|
|
soup = BeautifulSoup(item_content, 'html.parser')
|
|
|
|
# kill all script and style elements
|
|
|
|
for script in soup(["script", "style"]):
|
|
|
|
script.extract() # remove
|
|
|
|
|
|
|
|
# get text
|
|
|
|
text = soup.get_text()
|
|
|
|
return text
|
|
|
|
else:
|
|
|
|
return item_content
|
|
|
|
|
2019-06-05 16:20:26 +02:00
|
|
|
def extract_all_id(message, item_content, regex=None, is_file=False):
|
2019-05-14 17:49:31 +02:00
|
|
|
|
2019-06-05 16:20:26 +02:00
|
|
|
if is_file:
|
|
|
|
pgp_packet = get_pgp_packet_file(item_content)
|
2019-05-14 17:49:31 +02:00
|
|
|
extract_id_from_output(pgp_packet)
|
|
|
|
|
2019-06-05 16:20:26 +02:00
|
|
|
else:
|
|
|
|
# max execution time on regex
|
|
|
|
signal.alarm(max_execution_time)
|
|
|
|
try:
|
|
|
|
pgp_extracted_block = re.findall(regex, item_content)
|
|
|
|
except TimeoutException:
|
|
|
|
pgp_extracted_block = []
|
|
|
|
p.incr_module_timeout_statistic() # add encoder type
|
|
|
|
print ("{0} processing timeout".format(paste.p_rel_path))
|
|
|
|
else:
|
|
|
|
signal.alarm(0)
|
|
|
|
|
|
|
|
for pgp_to_dump in pgp_extracted_block:
|
|
|
|
pgp_packet = get_pgp_packet(message, pgp_to_dump)
|
|
|
|
extract_id_from_output(pgp_packet)
|
|
|
|
|
|
|
|
def get_pgp_packet(message, save_path):
|
2019-05-14 17:49:31 +02:00
|
|
|
save_path = '{}'.format(save_path)
|
2019-05-24 14:10:06 +02:00
|
|
|
if len(save_path) > 131072:
|
2019-06-05 16:20:26 +02:00
|
|
|
save_in_file(message, save_path)
|
|
|
|
return ''
|
|
|
|
else:
|
|
|
|
process1 = subprocess.Popen([ 'echo', '-e', save_path], stdout=subprocess.PIPE)
|
|
|
|
process2 = subprocess.Popen([ 'pgpdump'], stdin=process1.stdout, stdout=subprocess.PIPE)
|
|
|
|
process1.stdout.close()
|
2019-06-12 10:33:40 +02:00
|
|
|
output = process2.communicate()[0]
|
|
|
|
try:
|
|
|
|
output = output.decode()
|
|
|
|
except UnicodeDecodeError:
|
|
|
|
publisher.error('Error PgpDump UnicodeDecodeError: {}'.format(message))
|
|
|
|
output = ''
|
2019-06-05 16:20:26 +02:00
|
|
|
return output
|
|
|
|
|
|
|
|
def get_pgp_packet_file(file):
|
|
|
|
process1 = subprocess.Popen([ 'pgpdump', file], stdout=subprocess.PIPE)
|
|
|
|
output = process1.communicate()[0].decode()
|
2019-05-14 17:49:31 +02:00
|
|
|
return output
|
|
|
|
|
|
|
|
def extract_id_from_output(pgp_dump_outpout):
|
|
|
|
all_user_id = set(re.findall(regex_user_id, pgp_dump_outpout))
|
|
|
|
for user_id in all_user_id:
|
2019-05-20 11:57:49 +02:00
|
|
|
# avoid key injection in user_id:
|
|
|
|
pgp_dump_outpout.replace(user_id, '', 1)
|
|
|
|
|
2019-05-14 17:49:31 +02:00
|
|
|
user_id = user_id.replace(user_id_str, '', 1)
|
|
|
|
mail = None
|
|
|
|
if ' <' in user_id:
|
|
|
|
name, mail = user_id.rsplit(' <', 1)
|
|
|
|
mail = mail[:-1]
|
|
|
|
set_name.add(name)
|
|
|
|
set_mail.add(mail)
|
|
|
|
else:
|
|
|
|
name = user_id
|
|
|
|
set_name.add(name)
|
|
|
|
|
|
|
|
all_key_id = set(re.findall(regex_key_id, pgp_dump_outpout))
|
|
|
|
for key_id in all_key_id:
|
|
|
|
key_id = key_id.replace(key_id_str, '', 1)
|
|
|
|
set_key.add(key_id)
|
|
|
|
|
|
|
|
def save_pgp_data(type_pgp, date, item_path, data):
|
|
|
|
# create basic medata
|
2019-05-24 12:02:43 +02:00
|
|
|
if not serv_metadata.exists('pgpdump_metadata_{}:{}'.format(type_pgp, data)):
|
|
|
|
serv_metadata.hset('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'first_seen', date)
|
|
|
|
serv_metadata.hset('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'last_seen', date)
|
2019-05-14 17:49:31 +02:00
|
|
|
else:
|
2019-05-24 12:02:43 +02:00
|
|
|
last_seen = serv_metadata.hget('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'last_seen')
|
2019-05-14 17:49:31 +02:00
|
|
|
if not last_seen:
|
2019-05-24 12:02:43 +02:00
|
|
|
serv_metadata.hset('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'last_seen', date)
|
2019-05-14 17:49:31 +02:00
|
|
|
else:
|
|
|
|
if int(last_seen) < int(date):
|
2019-05-24 12:02:43 +02:00
|
|
|
serv_metadata.hset('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'last_seen', date)
|
2019-05-14 17:49:31 +02:00
|
|
|
|
|
|
|
# global set
|
2019-05-24 12:02:43 +02:00
|
|
|
serv_metadata.sadd('set_pgpdump_{}:{}'.format(type_pgp, data), item_path)
|
2019-05-14 17:49:31 +02:00
|
|
|
|
|
|
|
# daily
|
2019-05-24 12:02:43 +02:00
|
|
|
serv_metadata.hincrby('pgpdump:{}:{}'.format(type_pgp, date), data, 1)
|
2019-05-14 17:49:31 +02:00
|
|
|
|
|
|
|
# all type
|
2019-05-24 12:02:43 +02:00
|
|
|
serv_metadata.zincrby('pgpdump_all:{}'.format(type_pgp), data, 1)
|
2019-05-14 17:49:31 +02:00
|
|
|
|
|
|
|
# item_metadata
|
2019-05-24 12:02:43 +02:00
|
|
|
serv_metadata.sadd('item_pgpdump_{}:{}'.format(type_pgp, item_path), data)
|
2019-05-14 17:49:31 +02:00
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
# If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
|
|
|
|
# Port of the redis instance used by pubsublogger
|
|
|
|
publisher.port = 6380
|
|
|
|
# Script is the default channel used for the modules.
|
|
|
|
publisher.channel = 'Script'
|
|
|
|
|
|
|
|
# Section name in bin/packages/modules.cfg
|
|
|
|
#config_section = 'PgpDump'
|
|
|
|
config_section = 'PgpDump'
|
|
|
|
|
|
|
|
# Setup the I/O queues
|
|
|
|
p = Process(config_section)
|
|
|
|
|
2019-06-05 16:20:26 +02:00
|
|
|
r_serv_db = redis.StrictRedis(
|
|
|
|
host=p.config.get("ARDB_DB", "host"),
|
|
|
|
port=p.config.getint("ARDB_DB", "port"),
|
|
|
|
db=p.config.getint("ARDB_DB", "db"),
|
|
|
|
decode_responses=True)
|
|
|
|
|
2019-05-14 17:49:31 +02:00
|
|
|
serv_metadata = redis.StrictRedis(
|
|
|
|
host=p.config.get("ARDB_Metadata", "host"),
|
|
|
|
port=p.config.getint("ARDB_Metadata", "port"),
|
|
|
|
db=p.config.getint("ARDB_Metadata", "db"),
|
|
|
|
decode_responses=True)
|
|
|
|
|
|
|
|
# Sent to the logging a description of the module
|
|
|
|
publisher.info("PgpDump started")
|
|
|
|
|
2019-06-05 16:20:26 +02:00
|
|
|
# check/create pgpdump queue directory (used for huge pgp blocks)
|
|
|
|
pgp_dump_dir_to_process = os.path.join(os.environ['AIL_HOME'], 'temp', 'pgpdump')
|
|
|
|
if not os.path.isdir(pgp_dump_dir_to_process):
|
|
|
|
os.makedirs(pgp_dump_dir_to_process)
|
|
|
|
|
2019-05-14 17:49:31 +02:00
|
|
|
user_id_str = 'User ID - '
|
|
|
|
regex_user_id= '{}.+'.format(user_id_str)
|
|
|
|
|
|
|
|
key_id_str = 'Key ID - '
|
|
|
|
regex_key_id = '{}.+'.format(key_id_str)
|
|
|
|
regex_pgp_public_blocs = '-----BEGIN PGP PUBLIC KEY BLOCK-----[\s\S]+?-----END PGP PUBLIC KEY BLOCK-----'
|
|
|
|
regex_pgp_signature = '-----BEGIN PGP SIGNATURE-----[\s\S]+?-----END PGP SIGNATURE-----'
|
|
|
|
regex_pgp_message = '-----BEGIN PGP MESSAGE-----[\s\S]+?-----END PGP MESSAGE-----'
|
|
|
|
|
|
|
|
re.compile(regex_user_id)
|
|
|
|
re.compile(regex_key_id)
|
|
|
|
re.compile(regex_pgp_public_blocs)
|
|
|
|
re.compile(regex_pgp_signature)
|
|
|
|
re.compile(regex_pgp_message)
|
|
|
|
|
|
|
|
max_execution_time = p.config.getint("PgpDump", "max_execution_time")
|
|
|
|
|
|
|
|
# Endless loop getting messages from the input queue
|
|
|
|
while True:
|
|
|
|
|
2019-06-05 16:20:26 +02:00
|
|
|
is_file = False
|
2019-05-14 17:49:31 +02:00
|
|
|
set_key = set()
|
|
|
|
set_name = set()
|
|
|
|
set_mail = set()
|
|
|
|
|
2019-06-05 16:20:26 +02:00
|
|
|
if r_serv_db.scard('pgpdumb:uuid') > 0:
|
|
|
|
res = r_serv_db.spop('pgpdumb:uuid')
|
|
|
|
file_to_process, message = res.split(';', 1)
|
|
|
|
file_to_process = os.path.join(pgp_dump_dir_to_process, file_to_process)
|
|
|
|
date = datetime.datetime.now().strftime("%Y/%m/%d")
|
|
|
|
paste = Paste.Paste(message)
|
|
|
|
date = str(paste._get_p_date())
|
|
|
|
print(message)
|
|
|
|
extract_all_id(message, file_to_process, is_file=True)
|
|
|
|
os.remove(file_to_process)
|
|
|
|
|
|
|
|
else:
|
|
|
|
# Get one message from the input queue
|
|
|
|
message = p.get_from_set()
|
|
|
|
|
|
|
|
if message is None:
|
|
|
|
publisher.debug("{} queue is empty, waiting".format(config_section))
|
|
|
|
time.sleep(1)
|
|
|
|
continue
|
|
|
|
|
|
|
|
paste = Paste.Paste(message)
|
|
|
|
|
|
|
|
date = str(paste._get_p_date())
|
|
|
|
content = paste.get_p_content()
|
|
|
|
content = remove_html(content)
|
|
|
|
|
|
|
|
print(message)
|
2019-05-14 17:49:31 +02:00
|
|
|
|
2019-06-05 16:20:26 +02:00
|
|
|
extract_all_id(message, content, regex_pgp_public_blocs)
|
|
|
|
extract_all_id(message, content, regex_pgp_signature)
|
|
|
|
extract_all_id(message, content, regex_pgp_message)
|
2019-05-14 17:49:31 +02:00
|
|
|
|
|
|
|
for key_id in set_key:
|
|
|
|
print(key_id)
|
|
|
|
save_pgp_data('key', date, message, key_id)
|
|
|
|
|
|
|
|
for name_id in set_name:
|
|
|
|
print(name_id)
|
|
|
|
save_pgp_data('name', date, message, name_id)
|
|
|
|
|
|
|
|
for mail_id in set_mail:
|
|
|
|
print(mail_id)
|
|
|
|
save_pgp_data('mail', date, message, mail_id)
|