mirror of https://github.com/CIRCL/AIL-framework
				
				
				
			Improved description of modules inside the scripts
							parent
							
								
									2187c8338e
								
							
						
					
					
						commit
						3a4dcd691d
					
				| 
						 | 
				
			
			@ -100,7 +100,7 @@ Terms manager and occurence
 | 
			
		|||
 | 
			
		||||

 | 
			
		||||
 | 
			
		||||
## Top terms
 | 
			
		||||
### Top terms
 | 
			
		||||
 | 
			
		||||

 | 
			
		||||

 | 
			
		||||
| 
						 | 
				
			
			@ -108,6 +108,10 @@ Terms manager and occurence
 | 
			
		|||
 | 
			
		||||
[AIL framework screencast](https://www.youtube.com/watch?v=1_ZrZkRKmNo)
 | 
			
		||||
 | 
			
		||||
Command line module manager
 | 
			
		||||
---------------------------
 | 
			
		||||
 | 
			
		||||

 | 
			
		||||
 | 
			
		||||
License
 | 
			
		||||
=======
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -5,25 +5,7 @@
 | 
			
		|||
The ZMQ_Sub_Attribute Module
 | 
			
		||||
============================
 | 
			
		||||
 | 
			
		||||
This module is consuming the Redis-list created by the ZMQ_PubSub_Line_Q Module
 | 
			
		||||
 | 
			
		||||
It perform a sorting on the line's length and publish/forward them to
 | 
			
		||||
differents channels:
 | 
			
		||||
 | 
			
		||||
*Channel 1 if max length(line) < max
 | 
			
		||||
*Channel 2 if max length(line) > max
 | 
			
		||||
 | 
			
		||||
The collected informations about the processed pastes
 | 
			
		||||
(number of lines and maximum length line) are stored in Redis.
 | 
			
		||||
 | 
			
		||||
..note:: Module ZMQ_Something_Q and ZMQ_Something are closely bound, always put
 | 
			
		||||
the same Subscriber name in both of them.
 | 
			
		||||
 | 
			
		||||
Requirements
 | 
			
		||||
------------
 | 
			
		||||
 | 
			
		||||
*Need running Redis instances. (LevelDB & Redis)
 | 
			
		||||
*Need the ZMQ_PubSub_Line_Q Module running to be able to work properly.
 | 
			
		||||
This module is saving Attribute of the paste into redis
 | 
			
		||||
 | 
			
		||||
"""
 | 
			
		||||
import time
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,5 +1,16 @@
 | 
			
		|||
#!/usr/bin/env python2
 | 
			
		||||
# -*-coding:UTF-8 -*
 | 
			
		||||
 | 
			
		||||
"""
 | 
			
		||||
The Credential Module
 | 
			
		||||
=====================
 | 
			
		||||
 | 
			
		||||
This module is consuming the Redis-list created by the Categ module.
 | 
			
		||||
 | 
			
		||||
It apply credential regexes on paste content and warn if above a threshold.
 | 
			
		||||
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
import time
 | 
			
		||||
import sys
 | 
			
		||||
from packages import Paste
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,5 +1,17 @@
 | 
			
		|||
#!/usr/bin/env python
 | 
			
		||||
# -*-coding:UTF-8 -*
 | 
			
		||||
 | 
			
		||||
"""
 | 
			
		||||
The CreditCards Module
 | 
			
		||||
======================
 | 
			
		||||
 | 
			
		||||
This module is consuming the Redis-list created by the Categ module.
 | 
			
		||||
 | 
			
		||||
It apply credit card regexes on paste content and warn if above a threshold.
 | 
			
		||||
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
import pprint
 | 
			
		||||
import time
 | 
			
		||||
from packages import Paste
 | 
			
		||||
| 
						 | 
				
			
			@ -7,7 +19,6 @@ from packages import lib_refine
 | 
			
		|||
from pubsublogger import publisher
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
from Helper import Process
 | 
			
		||||
 | 
			
		||||
if __name__ == "__main__":
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -5,14 +5,6 @@
 | 
			
		|||
This module manage top sets for terms frequency.
 | 
			
		||||
Every 'refresh_rate' update the weekly and monthly set
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Requirements
 | 
			
		||||
------------
 | 
			
		||||
 | 
			
		||||
*Need running Redis instances. (Redis)
 | 
			
		||||
*Categories files of words in /files/ need to be created
 | 
			
		||||
*Need the ZMQ_PubSub_Tokenize_Q Module running to be able to work properly.
 | 
			
		||||
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
import redis
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,7 +1,13 @@
 | 
			
		|||
#!/usr/bin/env python2
 | 
			
		||||
# -*-coding:UTF-8 -*
 | 
			
		||||
"""
 | 
			
		||||
    Template for new modules
 | 
			
		||||
The CVE Module
 | 
			
		||||
======================
 | 
			
		||||
 | 
			
		||||
This module is consuming the Redis-list created by the Categ module.
 | 
			
		||||
 | 
			
		||||
It apply CVE regexes on paste content and warn if a reference to a CVE is spotted.
 | 
			
		||||
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
import time
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -5,8 +5,8 @@
 | 
			
		|||
The DomClassifier Module
 | 
			
		||||
============================
 | 
			
		||||
 | 
			
		||||
The DomClassifier modules is fetching the list of files to be
 | 
			
		||||
processed and index each file with a full-text indexer (Whoosh until now).
 | 
			
		||||
The DomClassifier modules extract and classify Internet domains/hostnames/IP addresses from 
 | 
			
		||||
the out output of the Global module.
 | 
			
		||||
 | 
			
		||||
"""
 | 
			
		||||
import time
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,7 +1,14 @@
 | 
			
		|||
#!/usr/bin/env python2
 | 
			
		||||
# -*-coding:UTF-8 -*
 | 
			
		||||
 | 
			
		||||
"""
 | 
			
		||||
    Template for new modules
 | 
			
		||||
The Keys Module
 | 
			
		||||
======================
 | 
			
		||||
 | 
			
		||||
This module is consuming the Redis-list created by the Global module.
 | 
			
		||||
 | 
			
		||||
It is looking for PGP encrypted messages
 | 
			
		||||
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
import time
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										10
									
								
								bin/Mail.py
								
								
								
								
							
							
						
						
									
										10
									
								
								bin/Mail.py
								
								
								
								
							| 
						 | 
				
			
			@ -1,6 +1,16 @@
 | 
			
		|||
#!/usr/bin/env python
 | 
			
		||||
# -*-coding:UTF-8 -*
 | 
			
		||||
 | 
			
		||||
"""
 | 
			
		||||
The CreditCards Module
 | 
			
		||||
======================
 | 
			
		||||
 | 
			
		||||
This module is consuming the Redis-list created by the Categ module.
 | 
			
		||||
 | 
			
		||||
It apply mail regexes on paste content and warn if above a threshold.
 | 
			
		||||
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
import redis
 | 
			
		||||
import pprint
 | 
			
		||||
import time
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										26
									
								
								bin/Mixer.py
								
								
								
								
							
							
						
						
									
										26
									
								
								bin/Mixer.py
								
								
								
								
							| 
						 | 
				
			
			@ -1,8 +1,8 @@
 | 
			
		|||
#!/usr/bin/env python
 | 
			
		||||
# -*-coding:UTF-8 -*
 | 
			
		||||
"""
 | 
			
		||||
The ZMQ_Feed_Q Module
 | 
			
		||||
=====================
 | 
			
		||||
The Mixer Module
 | 
			
		||||
================
 | 
			
		||||
 | 
			
		||||
This module is consuming the Redis-list created by the ZMQ_Feed_Q Module.
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -22,13 +22,7 @@ Depending on the configuration, this module will process the feed as follow:
 | 
			
		|||
Note that the hash of the content is defined as the sha1(gzip64encoded).
 | 
			
		||||
 | 
			
		||||
Every data coming from a named feed can be sent to a pre-processing module before going to the global module.
 | 
			
		||||
The mapping can be done via the variable feed_queue_mapping
 | 
			
		||||
 | 
			
		||||
Requirements
 | 
			
		||||
------------
 | 
			
		||||
 | 
			
		||||
*Need running Redis instances.
 | 
			
		||||
*Need the ZMQ_Feed_Q Module running to be able to work properly.
 | 
			
		||||
The mapping can be done via the variable FEED_QUEUE_MAPPING
 | 
			
		||||
 | 
			
		||||
"""
 | 
			
		||||
import base64
 | 
			
		||||
| 
						 | 
				
			
			@ -44,7 +38,7 @@ from Helper import Process
 | 
			
		|||
 | 
			
		||||
# CONFIG #
 | 
			
		||||
refresh_time = 30
 | 
			
		||||
feed_queue_mapping = { "feeder2": "preProcess1" } # Map a feeder name to a pre-processing module
 | 
			
		||||
FEED_QUEUE_MAPPING = { "feeder2": "preProcess1" } # Map a feeder name to a pre-processing module
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    publisher.port = 6380
 | 
			
		||||
| 
						 | 
				
			
			@ -117,8 +111,8 @@ if __name__ == '__main__':
 | 
			
		|||
                    else: # New content
 | 
			
		||||
 | 
			
		||||
                        # populate Global OR populate another set based on the feeder_name
 | 
			
		||||
                        if feeder_name in feed_queue_mapping:
 | 
			
		||||
                            p.populate_set_out(relay_message, feed_queue_mapping[feeder_name])
 | 
			
		||||
                        if feeder_name in FEED_QUEUE_MAPPING:
 | 
			
		||||
                            p.populate_set_out(relay_message, FEED_QUEUE_MAPPING[feeder_name])
 | 
			
		||||
                        else:
 | 
			
		||||
                            p.populate_set_out(relay_message, 'Mixer')
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -139,8 +133,8 @@ if __name__ == '__main__':
 | 
			
		|||
                        server.expire('HASH_'+paste_name, ttl_key)
 | 
			
		||||
 | 
			
		||||
                        # populate Global OR populate another set based on the feeder_name
 | 
			
		||||
                        if feeder_name in feed_queue_mapping:
 | 
			
		||||
                            p.populate_set_out(relay_message, feed_queue_mapping[feeder_name])
 | 
			
		||||
                        if feeder_name in FEED_QUEUE_MAPPING:
 | 
			
		||||
                            p.populate_set_out(relay_message, FEED_QUEUE_MAPPING[feeder_name])
 | 
			
		||||
                        else:
 | 
			
		||||
                            p.populate_set_out(relay_message, 'Mixer')
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -153,8 +147,8 @@ if __name__ == '__main__':
 | 
			
		|||
                            server.expire(paste_name, ttl_key)
 | 
			
		||||
 | 
			
		||||
                            # populate Global OR populate another set based on the feeder_name
 | 
			
		||||
                            if feeder_name in feed_queue_mapping:
 | 
			
		||||
                                p.populate_set_out(relay_message, feed_queue_mapping[feeder_name])
 | 
			
		||||
                            if feeder_name in FEED_QUEUE_MAPPING:
 | 
			
		||||
                                p.populate_set_out(relay_message, FEED_QUEUE_MAPPING[feeder_name])
 | 
			
		||||
                            else:
 | 
			
		||||
                                p.populate_set_out(relay_message, 'Mixer')
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,7 +1,14 @@
 | 
			
		|||
#!/usr/bin/env python2
 | 
			
		||||
# -*-coding:UTF-8 -*
 | 
			
		||||
 | 
			
		||||
"""
 | 
			
		||||
    module for finding phone numbers
 | 
			
		||||
The Phone Module
 | 
			
		||||
================
 | 
			
		||||
 | 
			
		||||
This module is consuming the Redis-list created by the Categ module.
 | 
			
		||||
 | 
			
		||||
It apply phone number regexes on paste content and warn if above a threshold.
 | 
			
		||||
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
import time
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -2,6 +2,8 @@
 | 
			
		|||
# -*-coding:UTF-8 -*
 | 
			
		||||
"""
 | 
			
		||||
This Module is used for term frequency.
 | 
			
		||||
It processes every paste coming from the global module and test the regexs
 | 
			
		||||
supplied in  the term webpage.
 | 
			
		||||
 | 
			
		||||
"""
 | 
			
		||||
import redis
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -6,6 +6,11 @@ from pubsublogger import publisher
 | 
			
		|||
from Helper import Process
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
'''
 | 
			
		||||
This module takes its input from the global module.
 | 
			
		||||
It applies some regex and publish matched content
 | 
			
		||||
'''
 | 
			
		||||
 | 
			
		||||
if __name__ == "__main__":
 | 
			
		||||
    publisher.port = 6380
 | 
			
		||||
    publisher.channel = "Script"
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,7 +1,14 @@
 | 
			
		|||
#!/usr/bin/env python2
 | 
			
		||||
# -*-coding:UTF-8 -*
 | 
			
		||||
 | 
			
		||||
"""
 | 
			
		||||
    Sql Injection module
 | 
			
		||||
The SQLInjectionDetection Module
 | 
			
		||||
================================
 | 
			
		||||
 | 
			
		||||
This module is consuming the Redis-list created by the Web module.
 | 
			
		||||
 | 
			
		||||
It test different possibility to makes some sqlInjection.
 | 
			
		||||
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
import time
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -4,8 +4,8 @@
 | 
			
		|||
    Sentiment analyser module.
 | 
			
		||||
    It takes its inputs from 'global'.
 | 
			
		||||
 | 
			
		||||
    The content analysed comes from the pastes with length of the line 
 | 
			
		||||
    above a defined threshold removed (get_p_content_with_removed_lines).
 | 
			
		||||
    The content is analysed if the length of the line is
 | 
			
		||||
    above a defined threshold (get_p_content_with_removed_lines).
 | 
			
		||||
    This is done because NLTK sentences tokemnizer (sent_tokenize) seems to crash
 | 
			
		||||
    for long lines (function _slices_from_text line#1276).
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -2,6 +2,8 @@
 | 
			
		|||
# -*-coding:UTF-8 -*
 | 
			
		||||
"""
 | 
			
		||||
This Module is used for term frequency.
 | 
			
		||||
It processes every paste coming from the global module and test the sets
 | 
			
		||||
supplied in  the term webpage.
 | 
			
		||||
 | 
			
		||||
"""
 | 
			
		||||
import redis
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,8 +1,8 @@
 | 
			
		|||
#!/usr/bin/env python
 | 
			
		||||
# -*-coding:UTF-8 -*
 | 
			
		||||
"""
 | 
			
		||||
The ZMQ_PubSub_Lines Module
 | 
			
		||||
============================
 | 
			
		||||
The Tokenize Module
 | 
			
		||||
===================
 | 
			
		||||
 | 
			
		||||
This module is consuming the Redis-list created by the ZMQ_PubSub_Tokenize_Q
 | 
			
		||||
Module.
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,5 +1,14 @@
 | 
			
		|||
#!/usr/bin/env python
 | 
			
		||||
# -*-coding:UTF-8 -*
 | 
			
		||||
 | 
			
		||||
"""
 | 
			
		||||
The Web Module
 | 
			
		||||
============================
 | 
			
		||||
 | 
			
		||||
This module tries to parse URLs and warns if some defined contry code are present.
 | 
			
		||||
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
import redis
 | 
			
		||||
import pprint
 | 
			
		||||
import time
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,7 +1,13 @@
 | 
			
		|||
#!/usr/bin/env python2
 | 
			
		||||
# -*-coding:UTF-8 -*
 | 
			
		||||
 | 
			
		||||
"""
 | 
			
		||||
    Template for new modules
 | 
			
		||||
The WebStats Module
 | 
			
		||||
======================
 | 
			
		||||
 | 
			
		||||
This module makes stats on URL recolted from the web module.
 | 
			
		||||
It consider the TLD, Domain and protocol.
 | 
			
		||||
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
import time
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,6 +1,15 @@
 | 
			
		|||
#!/usr/bin/env python2
 | 
			
		||||
# -*-coding:UTF-8 -*
 | 
			
		||||
 | 
			
		||||
'''
 | 
			
		||||
The preProcess Module
 | 
			
		||||
=====================
 | 
			
		||||
 | 
			
		||||
This module is just an example of how we can pre-process a feed coming from the Mixer
 | 
			
		||||
module before seding it to the Global module.
 | 
			
		||||
 | 
			
		||||
'''
 | 
			
		||||
 | 
			
		||||
import time
 | 
			
		||||
from pubsublogger import publisher
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
										
											Binary file not shown.
										
									
								
							| 
		 After Width: | Height: | Size: 51 KiB  | 
		Loading…
	
		Reference in New Issue