From 3b499a2ec830ddd87d65053fd9acc9191d3898fe Mon Sep 17 00:00:00 2001 From: Alexandre Dulaunoy Date: Tue, 26 Aug 2014 14:38:49 +0200 Subject: [PATCH 01/14] ZMQ Publisher removed ZMQ Publisher removed to allow concurrent use of the scripts. In short term, we would replace all publishing part within AIL into pub-sub Redis to avoid ZMQ limitation. --- bin/ZMQ_Sub_Urls.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/bin/ZMQ_Sub_Urls.py b/bin/ZMQ_Sub_Urls.py index 658d9c4a..889b7d01 100755 --- a/bin/ZMQ_Sub_Urls.py +++ b/bin/ZMQ_Sub_Urls.py @@ -26,11 +26,6 @@ if __name__ == "__main__": h = Helper.Redis_Queues(config_section, config_channel, subscriber_name) - # Publisher - pub_config_section = "PubSub_Url" - pub_config_channel = 'channel' - h.zmq_pub(pub_config_section, pub_config_channel) - # Subscriber h.zmq_sub(config_section) @@ -65,7 +60,7 @@ if __name__ == "__main__": port, resource_path, query_string, f1, f2, f3, \ f4 = x domains_list.append(domain) - h.zmq_pub_send(str(x)) + print (str(x)) publisher.debug('{} Published'.format(x)) if f1 == "onion": @@ -122,4 +117,4 @@ if __name__ == "__main__": message = h.redis_rpop() except dns.exception.Timeout: - print "dns.exception.Timeout", A_values + print "dns.exception.Timeout ", hostl From 1397db9691671f2be9b6a18cb575835378e3c7f9 Mon Sep 17 00:00:00 2001 From: Alexandre Dulaunoy Date: Mon, 8 Sep 2014 11:07:45 +0200 Subject: [PATCH 02/14] Global queue for DomainClassifier --- bin/ZMQ_Sub_DomainClassifier.py | 87 +++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) create mode 100755 bin/ZMQ_Sub_DomainClassifier.py diff --git a/bin/ZMQ_Sub_DomainClassifier.py b/bin/ZMQ_Sub_DomainClassifier.py new file mode 100755 index 00000000..a0b65ec1 --- /dev/null +++ b/bin/ZMQ_Sub_DomainClassifier.py @@ -0,0 +1,87 @@ +#!/usr/bin/env python2 +# -*-coding:UTF-8 -* + +""" +The ZMQ_Sub_DomainClassifier Module +============================ + +The ZMQ_Sub_DomainClassifier modules is fetching the list of files to be processed +and index each file with a full-text indexer (Whoosh until now). + +""" +import redis +import ConfigParser +import time +from packages import Paste +from packages import ZMQ_PubSub +from pubsublogger import publisher + +import DomainClassifier.domainclassifier +import os + +configfile = './packages/config.cfg' + + +def main(): + """Main Function""" + + # CONFIG # + cfg = ConfigParser.ConfigParser() + cfg.read(configfile) + + # Redis + r_serv1 = redis.StrictRedis( + host=cfg.get("Redis_Queues", "host"), + port=cfg.getint("Redis_Queues", "port"), + db=cfg.getint("Redis_Queues", "db")) + + # LOGGING # + publisher.channel = "Script" + + # ZMQ # + # Subscriber + channel = cfg.get("PubSub_Global", "channel") + subscriber_name = "DomainClassifier" + subscriber_config_section = "PubSub_Global" + + sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name) + + # FUNCTIONS # + publisher.info("""ZMQ DomainClassifier is Running""") + c = DomainClassifier.domainclassifier.Extract(rawtext="") + + while True: + try: + message = sub.get_msg_from_queue(r_serv1) + + if message is not None: + PST = Paste.Paste(message.split(" ", -1)[-1]) + else: + if r_serv1.sismember("SHUTDOWN_FLAGS", "Indexer"): + r_serv1.srem("SHUTDOWN_FLAGS", "Indexer") + publisher.warning("Shutdown Flag Up: Terminating.") + break + publisher.debug("Script DomainClassifier is idling 10s") + time.sleep(1) + continue + docpath = message.split(" ", -1)[-1] + paste = PST.get_p_content() + mimetype = PST._get_p_encoding() + if mimetype == "text/plain": + c.text(rawtext=paste) + c.potentialdomain() + c.validdomain(rtype=['A'],extended=True) + localizeddomains = c.include(expression=r'\.lu$') + if localizeddomains: + print (localizeddomains) + localizeddomains = c.localizedomain(cc='LU') + if localizeddomains: + print (localizeddomains) + except IOError: + print "CRC Checksum Failed on :", PST.p_path + publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format(PST.p_source, PST.p_date, PST.p_name)) + pass + + +if __name__ == "__main__": + main() From 3055b0deae9e44a2b3336ef64bcb56b1ef2839c9 Mon Sep 17 00:00:00 2001 From: Alexandre Dulaunoy Date: Mon, 8 Sep 2014 11:52:34 +0200 Subject: [PATCH 03/14] DomainClassifier requirements added --- pip_packages_requirement.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pip_packages_requirement.txt b/pip_packages_requirement.txt index 75d5d866..c369d7c4 100644 --- a/pip_packages_requirement.txt +++ b/pip_packages_requirement.txt @@ -26,6 +26,8 @@ ipython flask texttable +#DomainClassifier +DomainClassifier #Indexer requirements whoosh From 246621f6638f88b73d61e8a803e649d88a315a2f Mon Sep 17 00:00:00 2001 From: Alexandre Dulaunoy Date: Mon, 8 Sep 2014 16:43:21 +0200 Subject: [PATCH 04/14] First version of the DomainClassifier --- bin/ZMQ_Sub_DomainClassifier.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/bin/ZMQ_Sub_DomainClassifier.py b/bin/ZMQ_Sub_DomainClassifier.py index a0b65ec1..e246d011 100755 --- a/bin/ZMQ_Sub_DomainClassifier.py +++ b/bin/ZMQ_Sub_DomainClassifier.py @@ -44,6 +44,9 @@ def main(): subscriber_name = "DomainClassifier" subscriber_config_section = "PubSub_Global" + cc = cfg.get("PubSub_DomainClassifier", "cc") + cc_tld = cfg.get("PubSub_DomainClassifier", "cc_tld") + sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name) # FUNCTIONS # @@ -71,10 +74,10 @@ def main(): c.text(rawtext=paste) c.potentialdomain() c.validdomain(rtype=['A'],extended=True) - localizeddomains = c.include(expression=r'\.lu$') + localizeddomains = c.include(expression=cc_tld) if localizeddomains: print (localizeddomains) - localizeddomains = c.localizedomain(cc='LU') + localizeddomains = c.localizedomain(cc=cc) if localizeddomains: print (localizeddomains) except IOError: From de6e21d5a774cf9efd96492c5d9db3b1d2df6f71 Mon Sep 17 00:00:00 2001 From: Alexandre Dulaunoy Date: Mon, 8 Sep 2014 16:44:05 +0200 Subject: [PATCH 05/14] DomainClassifier sample configuration added --- bin/packages/config.cfg.sample | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/bin/packages/config.cfg.sample b/bin/packages/config.cfg.sample index 2483e6c9..f274fc55 100644 --- a/bin/packages/config.cfg.sample +++ b/bin/packages/config.cfg.sample @@ -67,6 +67,10 @@ channel = urls # country code logged as critical cc_critical = DE +[PubSub_DomainClassifier] +cc = DE +cc_tld = r'\.de$' + # Indexer configuration [Indexer] type = whoosh From 27b134ec0311e5295bd5743ac1cff9049c220342 Mon Sep 17 00:00:00 2001 From: Alexandre Dulaunoy Date: Wed, 10 Sep 2014 09:27:47 +0200 Subject: [PATCH 06/14] Add proper publisher for classified domains/hostnames --- bin/ZMQ_Sub_DomainClassifier.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/bin/ZMQ_Sub_DomainClassifier.py b/bin/ZMQ_Sub_DomainClassifier.py index e246d011..6ac18f88 100755 --- a/bin/ZMQ_Sub_DomainClassifier.py +++ b/bin/ZMQ_Sub_DomainClassifier.py @@ -76,10 +76,12 @@ def main(): c.validdomain(rtype=['A'],extended=True) localizeddomains = c.include(expression=cc_tld) if localizeddomains: - print (localizeddomains) + print(localizeddomains) + publisher.warning('DomainC;{};{};{};Checked {} located in {}'.format( PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc_tld)) localizeddomains = c.localizedomain(cc=cc) if localizeddomains: - print (localizeddomains) + print(localizeddomains) + publisher.warning('DomainC;{};{};{};Checked {} located in {}'.format( PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc)) except IOError: print "CRC Checksum Failed on :", PST.p_path publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format(PST.p_source, PST.p_date, PST.p_name)) From dfba5900db4c0056ae2b1e5e9e7146807e15c8d4 Mon Sep 17 00:00:00 2001 From: Alexandre Dulaunoy Date: Wed, 10 Sep 2014 16:38:58 +0200 Subject: [PATCH 07/14] Removed ultag --- var/www/templates/index.html | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/var/www/templates/index.html b/var/www/templates/index.html index f24662b4..fd6010b4 100644 --- a/var/www/templates/index.html +++ b/var/www/templates/index.html @@ -63,14 +63,7 @@
- -
- -

Anything in here will be replaced on browsers that support the canvas element

-
    -
-
-
+ From b331ff64929e48fa28f42f6c9b9603f1e711d002 Mon Sep 17 00:00:00 2001 From: Alexandre Dulaunoy Date: Wed, 10 Sep 2014 16:40:38 +0200 Subject: [PATCH 08/14] ultag removed --- var/www/static/js/indexjavascript.js | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/var/www/static/js/indexjavascript.js b/var/www/static/js/indexjavascript.js index 1622f978..ba516b0b 100644 --- a/var/www/static/js/indexjavascript.js +++ b/var/www/static/js/indexjavascript.js @@ -255,17 +255,6 @@ $(document).ready(function () { refresh(); - var ultag = document.getElementById("ultag"); - for (i=0;i Date: Thu, 18 Sep 2014 13:28:25 +0200 Subject: [PATCH 09/14] Spelling/language corrections --- README.md | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index 78eb2dd8..1aaca76e 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ AIL AIL framework - Analysis Information Leak framework -AIL is a modular framework to analyse potential information leak from unstructured data source like pastes from Pastebin or similar services. AIL framework is flexible and can be extended to support other functionalities to mine sensitive information. +AIL is a modular framework to analyse potential information leaks from unstructured data sources like pastes from Pastebin or similar services. AIL framework is flexible and can be extended to support other functionalities to mine sensitive information. ![Dashboard](./doc/screenshots/DashboardAIL.png?raw=true "AIL framework dashboard") ![Trending](./doc/screenshots/WordtrendingAIL.png?raw=true "AIL framework wordtrending") @@ -13,7 +13,7 @@ Requirements & Installation Auto installation ----------------- -Type these commands lines for a fully automated installation and start AIL-framework +Type these command lines for a fully automated installation and start AIL framework ``` git clone https://github.com/CIRCL/AIL-framework.git cd AIL-framework @@ -28,10 +28,10 @@ cd bin/ Manual installation ------------------- -Obviously: +As AIL is based on python, obviously an installation of python is a requirement: ``sudo apt-get install python2.7`` -But also pip, virtualenv and screen. +In addition pip, virtualenv and screen are needed: ``` sudo apt-get install python-pip sudo pip install virtualenv @@ -80,7 +80,7 @@ And install it: python setup.py install ``` -That's all the packages you can install with pip: +These are all the packages you can install with pip: ``` pip install redis @@ -167,34 +167,33 @@ cd var/www/ Flask_server.py ``` -Then you can browse the status of the AIL framework at the following URL: +Eventually you can browse the status of the AIL framework at the following URL: ``http://localhost:7000/`` Create a new module ------------------- -Assuming you already download the project and configure everything: +Assuming you already downloaded the project and configured everything: * Redis databases [http://redis.io/] * Redis Level DB [https://github.com/KDr2/redis-leveldb] -This module will be recover from a stream all the Tor .onion addresses: -"http://3g2upl4pq6kufc4m.onion/" Which look like this. +This module will recover from a streams all the Tor .onion addresses, which look like this: +"http://3g2upl4pq6kufc4m.onion/" -Basically we want to match all paste in with ``.onion`` addresses inside. +Basically we want to match all pastes in with ``.onion`` addresses inside. For that you can already use the module ``ZMQ_PubSub_Categ`` and just create your own category file in: ``/file/`` here it will be ``/file/onion_categ``. -You need also to link this file inside another file (list_categ_files). +You also need to link this file inside another file (list_categ_files). Inside the file "onion_categ", you will add the word "onion" (don't forget the carriage return). -Once it's done, at the launch of the AIL framework, every paste with the word onion inside them -will be forwarded on a specific channel (onion_categ). +Once it's done, after the launch of AIL framework, every paste with the word onion inside will be forwarded on a specific channel (onion_categ). -Then what you want to do it's recovering these pastes to extract these .onion addresses. +Then what you want to do is to identify these pastes to extract the .onion addresses. To do that, you'll need to create 2 scripts: ``ZMQ_Sub_Onion_Q.py`` (Redis bufferizing) @@ -205,14 +204,14 @@ Those two files are there as an example. Overview -------- -Here is a "chained tree" to show how all ZMQ Modules are linked and how the informations -(mainly the paste) is going through them. +Here is a "chained tree" to show how all ZMQ Modules that are linked and how the information +(mainly the paste) is flowing between them. The onion module is interfaced at top down level of this tree (like the ZMQ_Sub_Urls module). All modules that you want to create using the "tokenization method" and the "categories system" need to be created at this level. -If you want to create a general module (e.g. using all pastes), this module need to be created at the same level than ZMQ_Sub_Duplicate. +If you want to create a general module (e.g. using all pastes), this module needs to be created at the same level than ZMQ_Sub_Duplicate. ![ZMQTree](./doc/dia/ZMQ_Queuing_Tree.jpg?raw=true "ZMQ Tree") @@ -230,7 +229,7 @@ LICENSE ``` Copyright (C) 2014 Jules Debra - Copyright (C) 2014 CIRCL - Computer Incident Response Center Luxembourg (℅ smile gie) + Copyright (C) 2014 CIRCL - Computer Incident Response Center Luxembourg (c/o smile, security made in Lëtzebuerg, Groupement d'Intérêt Economique) This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by From 25757b0fffc284dd01578279881cbc0c90b62752 Mon Sep 17 00:00:00 2001 From: Alexandre Dulaunoy Date: Fri, 19 Sep 2014 14:03:05 +0200 Subject: [PATCH 10/14] A simple feeder script feeding data from pystemon to AIL. The configuration matches the default Redis parameters used in the pystemon configuration. https://github.com/cvandeplas/pystemon/blob/master/pystemon.yaml#L16 --- bin/feeder/pystemon-feeder.py | 50 ++++++++++++++++++++++++++++++++++ bin/packages/config.cfg.sample | 2 ++ 2 files changed, 52 insertions(+) create mode 100644 bin/feeder/pystemon-feeder.py diff --git a/bin/feeder/pystemon-feeder.py b/bin/feeder/pystemon-feeder.py new file mode 100644 index 00000000..1a9088b3 --- /dev/null +++ b/bin/feeder/pystemon-feeder.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# This file is part of AIL framework - Analysis Information Leak framework +# +# This a simple feeder script feeding data from pystemon to AIL. +# +# Don't forget to set your pystemonpath and ensure that the +# configuration matches this script. Default is Redis DB 10. +# +# https://github.com/cvandeplas/pystemon/blob/master/pystemon.yaml#L16 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Copyright (c) 2014 Alexandre Dulaunoy - a@foo.be + + +import zmq +import random +import sys +import time +import redis +import base64 + +port = "5556" +pystemonpath = "/home/pystemon/pystemon/" + +context = zmq.Context() +socket = context.socket(zmq.PUB) +socket.bind("tcp://*:%s" % port) + +# check https://github.com/cvandeplas/pystemon/blob/master/pystemon.yaml#L16 +r = redis.StrictRedis(host='localhost', db=10) + +# 101 pastes processed feed +# 102 raw pastes feed + +while True: + time.sleep(1) + topic = 101 + paste = r.lpop("pastes") + if paste is None: + continue + socket.send("%d %s" % (topic, paste)) + topic = 102 + messagedata = open(pystemonpath+paste).read() + socket.send("%d %s %s" % (topic, paste, base64.b64encode(messagedata))) diff --git a/bin/packages/config.cfg.sample b/bin/packages/config.cfg.sample index f274fc55..76e23faa 100644 --- a/bin/packages/config.cfg.sample +++ b/bin/packages/config.cfg.sample @@ -38,6 +38,8 @@ db = 1 # PUB / SUB : ZMQ [Feed] +# if you use the pystemon-feeder.py change the configuration +# where the feeder is listening. Usually it's 127.0.0.1:5556 adress = tcp://crf.circl.lu:5556 topicfilter = 102 From 8c64f413928c9f0935b889e13fc9c33833546039 Mon Sep 17 00:00:00 2001 From: Alexandre Dulaunoy Date: Mon, 22 Sep 2014 13:35:46 +0200 Subject: [PATCH 11/14] Update README.md --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 1aaca76e..94b1ce43 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,8 @@ AIL is a modular framework to analyse potential information leaks from unstructu ![Dashboard](./doc/screenshots/DashboardAIL.png?raw=true "AIL framework dashboard") ![Trending](./doc/screenshots/WordtrendingAIL.png?raw=true "AIL framework wordtrending") +![AIL framework screencast](https://www.youtube.com/watch?v=9idfHCIMzBY) + Requirements & Installation --------------------------- From 3c6b03b3faf9b954c7cdbc35673b20502dc14032 Mon Sep 17 00:00:00 2001 From: Alexandre Dulaunoy Date: Mon, 22 Sep 2014 13:36:13 +0200 Subject: [PATCH 12/14] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 94b1ce43..c579caa2 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ AIL is a modular framework to analyse potential information leaks from unstructu ![Dashboard](./doc/screenshots/DashboardAIL.png?raw=true "AIL framework dashboard") ![Trending](./doc/screenshots/WordtrendingAIL.png?raw=true "AIL framework wordtrending") -![AIL framework screencast](https://www.youtube.com/watch?v=9idfHCIMzBY) +AIL framework screencast: https://www.youtube.com/watch?v=9idfHCIMzBY Requirements & Installation --------------------------- From 30a2a8917e868468f326cab712a68b8c0a4ec304 Mon Sep 17 00:00:00 2001 From: Alexandre Dulaunoy Date: Mon, 1 Dec 2014 10:43:36 +0100 Subject: [PATCH 13/14] Update README.md Framework for Analysis of Information Leaks --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c579caa2..e316be40 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ AIL === -AIL framework - Analysis Information Leak framework +AIL framework - Framework for Analysis of Information Leaks AIL is a modular framework to analyse potential information leaks from unstructured data sources like pastes from Pastebin or similar services. AIL framework is flexible and can be extended to support other functionalities to mine sensitive information. From 3dd9c857916180edc5b713410d02637e22f8316e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Vinot?= Date: Tue, 9 Dec 2014 14:57:36 +0100 Subject: [PATCH 14/14] Add contributors --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index e316be40..d7d54f4e 100644 --- a/README.md +++ b/README.md @@ -232,6 +232,8 @@ LICENSE ``` Copyright (C) 2014 Jules Debra Copyright (C) 2014 CIRCL - Computer Incident Response Center Luxembourg (c/o smile, security made in Lëtzebuerg, Groupement d'Intérêt Economique) + Copyright (c) 2014 Raphaël Vinot + Copyright (c) 2014 Alexandre Dulaunoy This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by