Improved description of modules inside the scripts

2017-05-09 11:13:16 +02:00 · 2017-05-09 11:13:16 +02:00 · 3a4dcd691d
parent 2187c8338e
commit 3a4dcd691d
21 changed files with 120 additions and 56 deletions
--- a/README.md
+++ b/README.md
@ -100,7 +100,7 @@ Terms manager and occurence
 ![Term-Manager](./doc/screenshots/terms-manager.png?raw=true "AIL framework termManager")
-## Top terms
+### Top terms
 ![Term-Top](./doc/screenshots/terms-top.png?raw=true "AIL framework termTop")
 ![Term-Plot](./doc/screenshots/terms-plot.png?raw=true "AIL framework termPlot")
@ -108,6 +108,10 @@ Terms manager and occurence
 [AIL framework screencast](https://www.youtube.com/watch?v=1_ZrZkRKmNo)
 Command line module manager
 ---------------------------
 ![Module-Manager](./doc/screenshots/module-manager.png?raw=true "AIL framework ModuleInformationV2.py")
 License
 =======
--- a/bin/Attributes.py
+++ b/bin/Attributes.py
@ -5,25 +5,7 @@
 The ZMQ_Sub_Attribute Module
 ============================
-This module is consuming the Redis-list created by the ZMQ_PubSub_Line_Q Module
+This module is saving Attribute of the paste into redis
 It perform a sorting on the line's length and publish/forward them to
 differents channels:
 *Channel 1 if max length(line) < max
 *Channel 2 if max length(line) > max
 The collected informations about the processed pastes
 (number of lines and maximum length line) are stored in Redis.
 ..note:: Module ZMQ_Something_Q and ZMQ_Something are closely bound, always put
 the same Subscriber name in both of them.
 Requirements
 ------------
 *Need running Redis instances. (LevelDB & Redis)
 *Need the ZMQ_PubSub_Line_Q Module running to be able to work properly.
 """
 import time
--- a/bin/Credential.py
+++ b/bin/Credential.py
@ -1,5 +1,16 @@
 #!/usr/bin/env python2
 # -*-coding:UTF-8 -*
 """
 The Credential Module
 =====================
 This module is consuming the Redis-list created by the Categ module.
 It apply credential regexes on paste content and warn if above a threshold.
 """
 import time
 import sys
 from packages import Paste
--- a/bin/CreditCards.py
+++ b/bin/CreditCards.py
@ -1,5 +1,17 @@
 #!/usr/bin/env python
 # -*-coding:UTF-8 -*
 """
 The CreditCards Module
 ======================
 This module is consuming the Redis-list created by the Categ module.
 It apply credit card regexes on paste content and warn if above a threshold.
 """
 import pprint
 import time
 from packages import Paste
@ -7,7 +19,6 @@ from packages import lib_refine
 from pubsublogger import publisher
 import re
 from Helper import Process
 if __name__ == "__main__":
--- a/bin/CurveManageTopSets.py
+++ b/bin/CurveManageTopSets.py
@ -5,14 +5,6 @@
 This module manage top sets for terms frequency.
 Every 'refresh_rate' update the weekly and monthly set
 Requirements
 ------------
 *Need running Redis instances. (Redis)
 *Categories files of words in /files/ need to be created
 *Need the ZMQ_PubSub_Tokenize_Q Module running to be able to work properly.
 """
 import redis
--- a/bin/Cve.py
+++ b/bin/Cve.py
@ -1,7 +1,13 @@
 #!/usr/bin/env python2
 # -*-coding:UTF-8 -*
 """
-    Template for new modules
+The CVE Module
 ======================
 This module is consuming the Redis-list created by the Categ module.
 It apply CVE regexes on paste content and warn if a reference to a CVE is spotted.
 """
 import time
--- a/bin/DomClassifier.py
+++ b/bin/DomClassifier.py
@ -5,8 +5,8 @@
 The DomClassifier Module
 ============================
-The DomClassifier modules is fetching the list of files to be
+The DomClassifier modules extract and classify Internet domains/hostnames/IP addresses from 
-processed and index each file with a full-text indexer (Whoosh until now).
+the out output of the Global module.
 """
 import time
--- a/bin/Keys.py
+++ b/bin/Keys.py
@ -1,7 +1,14 @@
 #!/usr/bin/env python2
 # -*-coding:UTF-8 -*
 """
-    Template for new modules
+The Keys Module
 ======================
 This module is consuming the Redis-list created by the Global module.
 It is looking for PGP encrypted messages
 """
 import time
--- a/bin/Mail.py
+++ b/bin/Mail.py
@ -1,6 +1,16 @@
 #!/usr/bin/env python
 # -*-coding:UTF-8 -*
 """
 The CreditCards Module
 ======================
 This module is consuming the Redis-list created by the Categ module.
 It apply mail regexes on paste content and warn if above a threshold.
 """
 import redis
 import pprint
 import time
--- a/bin/Mixer.py
+++ b/bin/Mixer.py
@ -1,8 +1,8 @@
 #!/usr/bin/env python
 # -*-coding:UTF-8 -*
 """
-The ZMQ_Feed_Q Module
+The Mixer Module
-=====================
+================
 This module is consuming the Redis-list created by the ZMQ_Feed_Q Module.
@ -22,13 +22,7 @@ Depending on the configuration, this module will process the feed as follow:
 Note that the hash of the content is defined as the sha1(gzip64encoded).
 Every data coming from a named feed can be sent to a pre-processing module before going to the global module.
-The mapping can be done via the variable feed_queue_mapping
+The mapping can be done via the variable FEED_QUEUE_MAPPING
 Requirements
 ------------
 *Need running Redis instances.
 *Need the ZMQ_Feed_Q Module running to be able to work properly.
 """
 import base64
@ -44,7 +38,7 @@ from Helper import Process
 # CONFIG #
 refresh_time = 30
-feed_queue_mapping = { "feeder2": "preProcess1" } # Map a feeder name to a pre-processing module
+FEED_QUEUE_MAPPING = { "feeder2": "preProcess1" } # Map a feeder name to a pre-processing module
 if __name__ == '__main__':
    publisher.port = 6380
@ -117,8 +111,8 @@ if __name__ == '__main__':
                    else: # New content
                        # populate Global OR populate another set based on the feeder_name
-                        if feeder_name in feed_queue_mapping:
+                        if feeder_name in FEED_QUEUE_MAPPING:
-                            p.populate_set_out(relay_message, feed_queue_mapping[feeder_name])
+                            p.populate_set_out(relay_message, FEED_QUEUE_MAPPING[feeder_name])
                        else:
                            p.populate_set_out(relay_message, 'Mixer')
@ -139,8 +133,8 @@ if __name__ == '__main__':
                        server.expire('HASH_'+paste_name, ttl_key)
                        # populate Global OR populate another set based on the feeder_name
-                        if feeder_name in feed_queue_mapping:
+                        if feeder_name in FEED_QUEUE_MAPPING:
-                            p.populate_set_out(relay_message, feed_queue_mapping[feeder_name])
+                            p.populate_set_out(relay_message, FEED_QUEUE_MAPPING[feeder_name])
                        else:
                            p.populate_set_out(relay_message, 'Mixer')
@ -153,8 +147,8 @@ if __name__ == '__main__':
                            server.expire(paste_name, ttl_key)
                            # populate Global OR populate another set based on the feeder_name
-                            if feeder_name in feed_queue_mapping:
+                            if feeder_name in FEED_QUEUE_MAPPING:
-                                p.populate_set_out(relay_message, feed_queue_mapping[feeder_name])
+                                p.populate_set_out(relay_message, FEED_QUEUE_MAPPING[feeder_name])
                            else:
                                p.populate_set_out(relay_message, 'Mixer')
--- a/bin/Phone.py
+++ b/bin/Phone.py
@ -1,7 +1,14 @@
 #!/usr/bin/env python2
 # -*-coding:UTF-8 -*
 """
-    module for finding phone numbers
+The Phone Module
 ================
 This module is consuming the Redis-list created by the Categ module.
 It apply phone number regexes on paste content and warn if above a threshold.
 """
 import time
--- a/bin/RegexForTermsFrequency.py
+++ b/bin/RegexForTermsFrequency.py
@ -2,6 +2,8 @@
 # -*-coding:UTF-8 -*
 """
 This Module is used for term frequency.
 It processes every paste coming from the global module and test the regexs
 supplied in  the term webpage.
 """
 import redis
--- a/bin/Release.py
+++ b/bin/Release.py
@ -6,6 +6,11 @@ from pubsublogger import publisher
 from Helper import Process
 import re
 '''
 This module takes its input from the global module.
 It applies some regex and publish matched content
 '''
 if __name__ == "__main__":
    publisher.port = 6380
    publisher.channel = "Script"
--- a/bin/SQLInjectionDetection.py
+++ b/bin/SQLInjectionDetection.py
@ -1,7 +1,14 @@
 #!/usr/bin/env python2
 # -*-coding:UTF-8 -*
 """
-    Sql Injection module
+The SQLInjectionDetection Module
 ================================
 This module is consuming the Redis-list created by the Web module.
 It test different possibility to makes some sqlInjection.
 """
 import time
--- a/bin/SentimentAnalysis.py
+++ b/bin/SentimentAnalysis.py
@ -4,8 +4,8 @@
    Sentiment analyser module.
    It takes its inputs from 'global'.
-    The content analysed comes from the pastes with length of the line 
+    The content is analysed if the length of the line is
-    above a defined threshold removed (get_p_content_with_removed_lines).
+    above a defined threshold (get_p_content_with_removed_lines).
    This is done because NLTK sentences tokemnizer (sent_tokenize) seems to crash
    for long lines (function _slices_from_text line#1276).
--- a/bin/SetForTermsFrequency.py
+++ b/bin/SetForTermsFrequency.py
@ -2,6 +2,8 @@
 # -*-coding:UTF-8 -*
 """
 This Module is used for term frequency.
 It processes every paste coming from the global module and test the sets
 supplied in  the term webpage.
 """
 import redis
--- a/bin/Tokenize.py
+++ b/bin/Tokenize.py
@ -1,8 +1,8 @@
 #!/usr/bin/env python
 # -*-coding:UTF-8 -*
 """
-The ZMQ_PubSub_Lines Module
+The Tokenize Module
-============================
+===================
 This module is consuming the Redis-list created by the ZMQ_PubSub_Tokenize_Q
 Module.
--- a/bin/Web.py
+++ b/bin/Web.py
@ -1,5 +1,14 @@
 #!/usr/bin/env python
 # -*-coding:UTF-8 -*
 """
 The Web Module
 ============================
 This module tries to parse URLs and warns if some defined contry code are present.
 """
 import redis
 import pprint
 import time
--- a/bin/WebStats.py
+++ b/bin/WebStats.py
@ -1,7 +1,13 @@
 #!/usr/bin/env python2
 # -*-coding:UTF-8 -*
 """
-    Template for new modules
+The WebStats Module
 ======================
 This module makes stats on URL recolted from the web module.
 It consider the TLD, Domain and protocol.
 """
 import time
--- a/bin/preProcessFeed.py
+++ b/bin/preProcessFeed.py
@ -1,6 +1,15 @@
 #!/usr/bin/env python2
 # -*-coding:UTF-8 -*
 '''
 The preProcess Module
 =====================
 This module is just an example of how we can pre-process a feed coming from the Mixer
 module before seding it to the Global module.
 '''
 import time
 from pubsublogger import publisher
--- a/doc/screenshots/sentiment.png
+++ b/doc/screenshots/sentiment.png