diff --git a/README.md b/README.md index 372aedd3..772c9bde 100644 --- a/README.md +++ b/README.md @@ -100,7 +100,7 @@ Terms manager and occurence ![Term-Manager](./doc/screenshots/terms-manager.png?raw=true "AIL framework termManager") -## Top terms +### Top terms ![Term-Top](./doc/screenshots/terms-top.png?raw=true "AIL framework termTop") ![Term-Plot](./doc/screenshots/terms-plot.png?raw=true "AIL framework termPlot") @@ -108,6 +108,10 @@ Terms manager and occurence [AIL framework screencast](https://www.youtube.com/watch?v=1_ZrZkRKmNo) +Command line module manager +--------------------------- + +![Module-Manager](./doc/screenshots/module-manager.png?raw=true "AIL framework ModuleInformationV2.py") License ======= diff --git a/bin/Attributes.py b/bin/Attributes.py index a7f78696..66e22f39 100755 --- a/bin/Attributes.py +++ b/bin/Attributes.py @@ -5,25 +5,7 @@ The ZMQ_Sub_Attribute Module ============================ -This module is consuming the Redis-list created by the ZMQ_PubSub_Line_Q Module - -It perform a sorting on the line's length and publish/forward them to -differents channels: - -*Channel 1 if max length(line) < max -*Channel 2 if max length(line) > max - -The collected informations about the processed pastes -(number of lines and maximum length line) are stored in Redis. - -..note:: Module ZMQ_Something_Q and ZMQ_Something are closely bound, always put -the same Subscriber name in both of them. - -Requirements ------------- - -*Need running Redis instances. (LevelDB & Redis) -*Need the ZMQ_PubSub_Line_Q Module running to be able to work properly. +This module is saving Attribute of the paste into redis """ import time diff --git a/bin/Credential.py b/bin/Credential.py index 8c62f34a..ff8f8f97 100755 --- a/bin/Credential.py +++ b/bin/Credential.py @@ -1,5 +1,16 @@ #!/usr/bin/env python2 # -*-coding:UTF-8 -* + +""" +The Credential Module +===================== + +This module is consuming the Redis-list created by the Categ module. + +It apply credential regexes on paste content and warn if above a threshold. + +""" + import time import sys from packages import Paste diff --git a/bin/CreditCards.py b/bin/CreditCards.py index 6c9bf9c1..79442576 100755 --- a/bin/CreditCards.py +++ b/bin/CreditCards.py @@ -1,5 +1,17 @@ #!/usr/bin/env python # -*-coding:UTF-8 -* + +""" +The CreditCards Module +====================== + +This module is consuming the Redis-list created by the Categ module. + +It apply credit card regexes on paste content and warn if above a threshold. + +""" + + import pprint import time from packages import Paste @@ -7,7 +19,6 @@ from packages import lib_refine from pubsublogger import publisher import re - from Helper import Process if __name__ == "__main__": diff --git a/bin/CurveManageTopSets.py b/bin/CurveManageTopSets.py index 562705cf..eea46a8c 100755 --- a/bin/CurveManageTopSets.py +++ b/bin/CurveManageTopSets.py @@ -5,14 +5,6 @@ This module manage top sets for terms frequency. Every 'refresh_rate' update the weekly and monthly set - -Requirements ------------- - -*Need running Redis instances. (Redis) -*Categories files of words in /files/ need to be created -*Need the ZMQ_PubSub_Tokenize_Q Module running to be able to work properly. - """ import redis diff --git a/bin/Cve.py b/bin/Cve.py index 97e5aaae..fb4b0b24 100755 --- a/bin/Cve.py +++ b/bin/Cve.py @@ -1,7 +1,13 @@ #!/usr/bin/env python2 # -*-coding:UTF-8 -* """ - Template for new modules +The CVE Module +====================== + +This module is consuming the Redis-list created by the Categ module. + +It apply CVE regexes on paste content and warn if a reference to a CVE is spotted. + """ import time diff --git a/bin/DomClassifier.py b/bin/DomClassifier.py index 74522917..c205cb01 100755 --- a/bin/DomClassifier.py +++ b/bin/DomClassifier.py @@ -5,8 +5,8 @@ The DomClassifier Module ============================ -The DomClassifier modules is fetching the list of files to be -processed and index each file with a full-text indexer (Whoosh until now). +The DomClassifier modules extract and classify Internet domains/hostnames/IP addresses from +the out output of the Global module. """ import time diff --git a/bin/Keys.py b/bin/Keys.py index a286dada..d2e7ebd2 100755 --- a/bin/Keys.py +++ b/bin/Keys.py @@ -1,7 +1,14 @@ #!/usr/bin/env python2 # -*-coding:UTF-8 -* + """ - Template for new modules +The Keys Module +====================== + +This module is consuming the Redis-list created by the Global module. + +It is looking for PGP encrypted messages + """ import time diff --git a/bin/Mail.py b/bin/Mail.py index 6ec938f3..99dd6948 100755 --- a/bin/Mail.py +++ b/bin/Mail.py @@ -1,6 +1,16 @@ #!/usr/bin/env python # -*-coding:UTF-8 -* +""" +The CreditCards Module +====================== + +This module is consuming the Redis-list created by the Categ module. + +It apply mail regexes on paste content and warn if above a threshold. + +""" + import redis import pprint import time diff --git a/bin/Mixer.py b/bin/Mixer.py index 266eada3..40614253 100755 --- a/bin/Mixer.py +++ b/bin/Mixer.py @@ -1,8 +1,8 @@ #!/usr/bin/env python # -*-coding:UTF-8 -* """ -The ZMQ_Feed_Q Module -===================== +The Mixer Module +================ This module is consuming the Redis-list created by the ZMQ_Feed_Q Module. @@ -22,13 +22,7 @@ Depending on the configuration, this module will process the feed as follow: Note that the hash of the content is defined as the sha1(gzip64encoded). Every data coming from a named feed can be sent to a pre-processing module before going to the global module. -The mapping can be done via the variable feed_queue_mapping - -Requirements ------------- - -*Need running Redis instances. -*Need the ZMQ_Feed_Q Module running to be able to work properly. +The mapping can be done via the variable FEED_QUEUE_MAPPING """ import base64 @@ -44,7 +38,7 @@ from Helper import Process # CONFIG # refresh_time = 30 -feed_queue_mapping = { "feeder2": "preProcess1" } # Map a feeder name to a pre-processing module +FEED_QUEUE_MAPPING = { "feeder2": "preProcess1" } # Map a feeder name to a pre-processing module if __name__ == '__main__': publisher.port = 6380 @@ -117,8 +111,8 @@ if __name__ == '__main__': else: # New content # populate Global OR populate another set based on the feeder_name - if feeder_name in feed_queue_mapping: - p.populate_set_out(relay_message, feed_queue_mapping[feeder_name]) + if feeder_name in FEED_QUEUE_MAPPING: + p.populate_set_out(relay_message, FEED_QUEUE_MAPPING[feeder_name]) else: p.populate_set_out(relay_message, 'Mixer') @@ -139,8 +133,8 @@ if __name__ == '__main__': server.expire('HASH_'+paste_name, ttl_key) # populate Global OR populate another set based on the feeder_name - if feeder_name in feed_queue_mapping: - p.populate_set_out(relay_message, feed_queue_mapping[feeder_name]) + if feeder_name in FEED_QUEUE_MAPPING: + p.populate_set_out(relay_message, FEED_QUEUE_MAPPING[feeder_name]) else: p.populate_set_out(relay_message, 'Mixer') @@ -153,8 +147,8 @@ if __name__ == '__main__': server.expire(paste_name, ttl_key) # populate Global OR populate another set based on the feeder_name - if feeder_name in feed_queue_mapping: - p.populate_set_out(relay_message, feed_queue_mapping[feeder_name]) + if feeder_name in FEED_QUEUE_MAPPING: + p.populate_set_out(relay_message, FEED_QUEUE_MAPPING[feeder_name]) else: p.populate_set_out(relay_message, 'Mixer') diff --git a/bin/Phone.py b/bin/Phone.py index 6ad4b1b6..61000f98 100755 --- a/bin/Phone.py +++ b/bin/Phone.py @@ -1,7 +1,14 @@ #!/usr/bin/env python2 # -*-coding:UTF-8 -* + """ - module for finding phone numbers +The Phone Module +================ + +This module is consuming the Redis-list created by the Categ module. + +It apply phone number regexes on paste content and warn if above a threshold. + """ import time diff --git a/bin/RegexForTermsFrequency.py b/bin/RegexForTermsFrequency.py index 023710c4..2efdfee5 100755 --- a/bin/RegexForTermsFrequency.py +++ b/bin/RegexForTermsFrequency.py @@ -2,6 +2,8 @@ # -*-coding:UTF-8 -* """ This Module is used for term frequency. +It processes every paste coming from the global module and test the regexs +supplied in the term webpage. """ import redis diff --git a/bin/Release.py b/bin/Release.py index ce30ea3f..98e60a96 100755 --- a/bin/Release.py +++ b/bin/Release.py @@ -6,6 +6,11 @@ from pubsublogger import publisher from Helper import Process import re +''' +This module takes its input from the global module. +It applies some regex and publish matched content +''' + if __name__ == "__main__": publisher.port = 6380 publisher.channel = "Script" diff --git a/bin/SQLInjectionDetection.py b/bin/SQLInjectionDetection.py index 1901d4b6..d2948f1b 100755 --- a/bin/SQLInjectionDetection.py +++ b/bin/SQLInjectionDetection.py @@ -1,7 +1,14 @@ #!/usr/bin/env python2 # -*-coding:UTF-8 -* + """ - Sql Injection module +The SQLInjectionDetection Module +================================ + +This module is consuming the Redis-list created by the Web module. + +It test different possibility to makes some sqlInjection. + """ import time diff --git a/bin/SentimentAnalysis.py b/bin/SentimentAnalysis.py index 8cd71305..00b15abb 100755 --- a/bin/SentimentAnalysis.py +++ b/bin/SentimentAnalysis.py @@ -4,8 +4,8 @@ Sentiment analyser module. It takes its inputs from 'global'. - The content analysed comes from the pastes with length of the line - above a defined threshold removed (get_p_content_with_removed_lines). + The content is analysed if the length of the line is + above a defined threshold (get_p_content_with_removed_lines). This is done because NLTK sentences tokemnizer (sent_tokenize) seems to crash for long lines (function _slices_from_text line#1276). diff --git a/bin/SetForTermsFrequency.py b/bin/SetForTermsFrequency.py index b3100073..c4e480ff 100755 --- a/bin/SetForTermsFrequency.py +++ b/bin/SetForTermsFrequency.py @@ -2,6 +2,8 @@ # -*-coding:UTF-8 -* """ This Module is used for term frequency. +It processes every paste coming from the global module and test the sets +supplied in the term webpage. """ import redis diff --git a/bin/Tokenize.py b/bin/Tokenize.py index 5e5c9b17..377cba5a 100755 --- a/bin/Tokenize.py +++ b/bin/Tokenize.py @@ -1,8 +1,8 @@ #!/usr/bin/env python # -*-coding:UTF-8 -* """ -The ZMQ_PubSub_Lines Module -============================ +The Tokenize Module +=================== This module is consuming the Redis-list created by the ZMQ_PubSub_Tokenize_Q Module. diff --git a/bin/Web.py b/bin/Web.py index 0fae546d..dc2bf2fd 100755 --- a/bin/Web.py +++ b/bin/Web.py @@ -1,5 +1,14 @@ #!/usr/bin/env python # -*-coding:UTF-8 -* + +""" +The Web Module +============================ + +This module tries to parse URLs and warns if some defined contry code are present. + +""" + import redis import pprint import time diff --git a/bin/WebStats.py b/bin/WebStats.py index 4cc05b48..cbb52e7a 100755 --- a/bin/WebStats.py +++ b/bin/WebStats.py @@ -1,7 +1,13 @@ #!/usr/bin/env python2 # -*-coding:UTF-8 -* + """ - Template for new modules +The WebStats Module +====================== + +This module makes stats on URL recolted from the web module. +It consider the TLD, Domain and protocol. + """ import time diff --git a/bin/preProcessFeed.py b/bin/preProcessFeed.py index fe542647..d9ef419d 100755 --- a/bin/preProcessFeed.py +++ b/bin/preProcessFeed.py @@ -1,6 +1,15 @@ #!/usr/bin/env python2 # -*-coding:UTF-8 -* +''' +The preProcess Module +===================== + +This module is just an example of how we can pre-process a feed coming from the Mixer +module before seding it to the Global module. + +''' + import time from pubsublogger import publisher diff --git a/doc/screenshots/sentiment.png b/doc/screenshots/sentiment.png new file mode 100644 index 00000000..d7f1dbec Binary files /dev/null and b/doc/screenshots/sentiment.png differ