From 727bc5596691da467d9585e205a2a7d183c64506 Mon Sep 17 00:00:00 2001
From: Terrtia <or1994@hotmail.fr>
Date: Thu, 30 Jan 2020 11:31:33 +0100
Subject: [PATCH 01/15] fix: [Tag core] check if item_date type is an integer

---
 bin/packages/Tag.py | 5 +++++
 1 file changed, 5 insertions(+)
diff --git a/bin/packages/Tag.py b/bin/packages/Tag.py
index b8394b34..03b9c990 100755
--- a/bin/packages/Tag.py
+++ b/bin/packages/Tag.py
@@ -392,6 +392,11 @@ def add_tag(object_type, tag, object_id, obj_date=None):
     # new tag
     if not is_obj_tagged(object_id, tag):
         # # TODO: # FIXME: sanityze object_type
+        if obj_date:
+            try:
+                obj_date = int(obj_date)
+            except:
+                obj_date = None
         if not obj_date:
             obj_date = get_obj_date(object_type, object_id)
         add_global_tag(tag, object_type=object_type)

From e19a3b3e630ed8cacd492e5c36ffa59c3cdfac78 Mon Sep 17 00:00:00 2001
From: Terrtia <or1994@hotmail.fr>
Date: Mon, 3 Feb 2020 09:37:08 +0100
Subject: [PATCH 02/15] fix: [Domain explorer UI] fix daterange pagination
 links

---
 .../crawler_splash/domain_explorer.html       |   3 +
 .../templates/domains/domain_explorer.html    | 239 ------------------
 2 files changed, 3 insertions(+), 239 deletions(-)
 delete mode 100644 var/www/templates/domains/domain_explorer.html

diff --git a/var/www/templates/crawler/crawler_splash/domain_explorer.html b/var/www/templates/crawler/crawler_splash/domain_explorer.html
index 007da8f0..16595014 100644
--- a/var/www/templates/crawler/crawler_splash/domain_explorer.html
+++ b/var/www/templates/crawler/crawler_splash/domain_explorer.html
@@ -83,6 +83,9 @@
             {%else%}
               {% set target_url=url_for('crawler_splash.domains_explorer_web') + "?domain_type=regular" %}
             {%endif%}
+            {%if 'date_from' in dict_data %}
+              {% set target_url = target_url + '&date_from=' + dict_data['date_from'] + '&date_to=' + dict_data['date_to'] %}
+            {%endif%}
             {% include 'pagination.html' %}
   				{% endwith %}
         {%endif%}
diff --git a/var/www/templates/domains/domain_explorer.html b/var/www/templates/domains/domain_explorer.html
deleted file mode 100644
index ec6211c7..00000000
--- a/var/www/templates/domains/domain_explorer.html
+++ /dev/null
@@ -1,239 +0,0 @@
-<!DOCTYPE html>
-<html>
-<head>
-  <title>Show Domain - AIL</title>
-	<link rel="icon" href="{{ url_for('static', filename='image/ail-icon.png') }}">
-	<!-- Core CSS -->
-	<link href="{{ url_for('static', filename='css/bootstrap4.min.css') }}" rel="stylesheet">
-	<link href="{{ url_for('static', filename='css/font-awesome.min.css') }}" rel="stylesheet">
-	<!-- JS -->
-	<script src="{{ url_for('static', filename='js/jquery.js')}}"></script>
-  <script src="{{ url_for('static', filename='js/popper.min.js')}}"></script>
-  <script src="{{ url_for('static', filename='js/bootstrap4.min.js') }}"></script>
-
-  <style>
-    .card-columns {
-      column-count: 4;
-    }
-  </style>
-
-</head>
-<body>
-
-	{% include 'nav_bar.html' %}
-
-	<div class="container-fluid">
-		<div class="row">
-
-			{% include 'crawler/menu_sidebar.html' %}
-
-			<div class="col-12 col-lg-10" id="core_content">
-
-        <div class="card my-2 border-secondary" >
-					<div class="card-body py-2">
-						<div class="row">
-              <div class="col-md-3 text-center">
-								<button class="btn btn-primary" onclick="blocks.value=0;pixelate_all();">
-										<i class="fas fa-eye-slash"></i>
-										<span class="label-icon">Hide</span>
-								</button>
-							</div>
-							<div class="col-md-6">
-								<input class="custom-range mt-2" id="blocks" type="range" min="1" max="50" value="5">
-							</div>
-							<div class="col-md-3 text-center">
-								<button class="btn btn-primary" onclick="blocks.value=50;pixelate_all();">
-                    <i class="fas fa-plus-square"></i>
-										<span class="label-icon">Full resolution</span>
-								</button>
-							</div>
-						</div>
-					</div>
-				</div>
-
-        {% for dict_domain in dict_data['list_elem'] %}
-
-          {% if loop.index0 % 4 == 0 %}
-            <div class="card-deck mt-3">
-          {% endif %}
-
-              <div class="card">
-                <div class="text-center">
-                  <canvas id="canvas_{{loop.index0}}" style="max-height: 400px; max-width: 100%;"></canvas>
-                  <!-- <img style="height:400px;" src="{{url_for('showsavedpastes.screenshot', filename='a6/b9/33/f5/f1/0a/16d8b1467093dd5469bfd86bdb2c12f3694677c44406fa758f8b')}}" alt="Card image cap"> -->
-                </div>
-                <div class="card-body">
-                  <h5 class="card-title">
-                    <a target="_blank" href="{{ url_for('crawler_splash.showDomain') }}?domain={{dict_domain["id"]}}">
-                      {{dict_domain["id"]}}
-                    </a>
-                  </h5>
-                  <p class="card-text">
-                    <small class="text-muted">
-                      First seen: {{dict_domain["first_seen"]}}<br>
-                      Last_seen: {{dict_domain["first_seen"]}}<br>
-                      Ports: {{dict_domain["ports"]}}
-                    </small>
-                  </p>
-                  <small class="text-muted">Status: </small>
-                  {% if dict_domain["status"] %}
-										<span style="color:Green;">
-											<i class="fas fa-check-circle"></i>
-											UP
-										</span>
-									{% else %}
-										<span style="color:Red;">
-											<i class="fas fa-times-circle"></i>
-											DOWN
-										</span>
-									{% endif %}
-                  <div>
-                  {% for tag in dict_domain['tags'] %}
-                    <a href="{{ url_for('tags_ui.get_obj_by_tags') }}?object_type=domain&ltags={{ tag }}">
-                      <span class="badge badge-{{ bootstrap_label[loop.index0 % 5] }}">{{ tag }}</span>
-                    </a>
-                  {%  endfor %}
-                  </div>
-                </div>
-              </div>
-
-
-          {% if loop.index0 % 4 == 3 %}
-            </div>
-          {% endif %}
-
-        {% endfor %}
-
-        {% if dict_data['list_elem']|length % 4 != 0 %}
-          </div>
-        {% endif %}
-
-        <br>
-        <br>
-
-        {%if 'list_elem' in dict_data%}
-          {% with page=dict_data['page'], nb_page_max=dict_data['nb_pages'],  nb_first_elem=dict_data['nb_first_elem'], nb_last_elem=dict_data['nb_last_elem'], nb_all_elem=dict_data['nb_all_elem'] %}
-            {% set object_name="domain" %}
-            {%if domain_type=='onion'%}
-              {% set target_url=url_for('crawler_splash.domains_explorer_onion') + "?domain_type=onion" %}
-            {%else%}
-              {% set target_url=url_for('crawler_splash.domains_explorer_web') + "?domain_type=regular" %}
-            {%endif%}
-            {% include 'pagination.html' %}
-  				{% endwith %}
-        {%endif%}
-
-
-
-
-
-			</div>
-		</div>
-	</div>
-
-</body>
-
-
-<script>
-  $(document).ready(function(){
-
-  });
-
-function toggle_sidebar(){
-	if($('#nav_menu').is(':visible')){
-		$('#nav_menu').hide();
-		$('#side_menu').removeClass('border-right')
-		$('#side_menu').removeClass('col-lg-2')
-		$('#core_content').removeClass('col-lg-10')
-	}else{
-		$('#nav_menu').show();
-		$('#side_menu').addClass('border-right')
-		$('#side_menu').addClass('col-lg-2')
-		$('#core_content').addClass('col-lg-10')
-	}
-}
-</script>
-
-
-<script>
-
-// img_url
-// ctx
-// canevas_id
-
-var dict_canevas_blurr_img = {}
-
-function init_canevas_blurr_img(canevas_id, img_url){
-
-  // ctx, turn off image smoothin
-  dict_canevas_blurr_img[canevas_id] = {}
-  var canvas_container = document.getElementById(canevas_id);
-  var ctx = canvas_container.getContext('2d');
-  ctx.webkitImageSmoothingEnabled = false;
-  ctx.imageSmoothingEnabled = false;
-  dict_canevas_blurr_img[canevas_id]["ctx"] = ctx;
-
-  // img
-  dict_canevas_blurr_img[canevas_id]["img"] = new Image();
-  dict_canevas_blurr_img[canevas_id]["img"].onload = function() {pixelate_img(canevas_id);};
-  dict_canevas_blurr_img[canevas_id]["img"].addEventListener("error", function() {img_error(canevas_id);});
-  dict_canevas_blurr_img[canevas_id]["img"].src = img_url;
-}
-
-function pixelate_all(){
-  Object.entries(dict_canevas_blurr_img).forEach(([key, value]) => {
-   pixelate_img(key);
-  });
-}
-
-function pixelate_img(canevas_id) {
-
-  if (typeof canevas_id !== 'undefined') {
-    var canevas_to_blurr = document.getElementById(canevas_id);
-
-    /// use slider value
-    if( blocks.value == 50 ){
-        size = 1;
-    } else {
-        var size = (blocks.value) * 0.01;
-    }
-
-    canevas_to_blurr.width = dict_canevas_blurr_img[canevas_id]["img"].width;
-    canevas_to_blurr.height = dict_canevas_blurr_img[canevas_id]["img"].height;
-
-    /// cache scaled width and height
-    w = canevas_to_blurr.width * size;
-    h = canevas_to_blurr.height * size;
-
-    /// draw original image to the scaled size
-    dict_canevas_blurr_img[canevas_id]["ctx"].drawImage(dict_canevas_blurr_img[canevas_id]["img"], 0, 0, w, h);
-
-    /// pixelated
-    dict_canevas_blurr_img[canevas_id]["ctx"].drawImage(canevas_to_blurr, 0, 0, w, h, 0, 0, canevas_to_blurr.width, canevas_to_blurr.height);
-  }
-
-}
-
-function img_error(canevas_id) {
-  dict_canevas_blurr_img[canevas_id]["img"].onerror=null;
-  dict_canevas_blurr_img[canevas_id]["img"].src="{{ url_for('static', filename='image/AIL.png') }}";
-  dict_canevas_blurr_img[canevas_id]["img"].width = 50
-}
-
-blocks.addEventListener('change', pixelate_all, false);
-
-
-{% for dict_domain in dict_data['list_elem'] %}
-  {% if 'screenshot' in dict_domain %}
-    {% if dict_domain['is_tags_safe'] %}
-      var screenshot_url = "{{ url_for('showsavedpastes.screenshot', filename="") }}{{dict_domain['screenshot']}}";
-    {% else %}
-    var screenshot_url = "{{ url_for('static', filename='image/misp-logo.png') }}";
-    {% endif %}
-    init_canevas_blurr_img("canvas_{{loop.index0}}", screenshot_url);
-  {% endif %}
-{% endfor %}
-
-</script>
-
-</html>

From e808840f957c810b8e3944cba808716dc722581b Mon Sep 17 00:00:00 2001
From: Terrtia <or1994@hotmail.fr>
Date: Mon, 3 Feb 2020 10:32:20 +0100
Subject: [PATCH 03/15] fix: [Global: filename provided by all feeders] avoid
 path tranversal

---
 bin/Global.py | 54 +++++++++++++++++++++++++++++----------------------
 1 file changed, 31 insertions(+), 23 deletions(-)

diff --git a/bin/Global.py b/bin/Global.py
index 00207c63..b0419bea 100755
--- a/bin/Global.py
+++ b/bin/Global.py
@@ -45,8 +45,10 @@ if __name__ == '__main__':
 
     p = Process(config_section)
 
+    # get and sanityze PASTE DIRECTORY
     PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], p.config.get("Directories", "pastes"))
     PASTES_FOLDERS = PASTES_FOLDER + '/'
+    PASTES_FOLDERS = os.path.join(os.path.realpath(PASTES_FOLDERS), '')
 
     # LOGGING #
     publisher.info("Feed Script started to receive & publish.")
@@ -75,6 +77,10 @@ if __name__ == '__main__':
             time.sleep(1)
             continue
 
+        # remove PASTES_FOLDER from item path (crawled item + submited)
+        if PASTES_FOLDERS in paste:
+            paste = paste.replace(PASTES_FOLDERS, '', 1)
+
         file_name_paste = paste.split('/')[-1]
         if len(file_name_paste)>255:
             new_file_name_paste = '{}{}.gz'.format(file_name_paste[:215], str(uuid.uuid4()))
@@ -82,33 +88,35 @@ if __name__ == '__main__':
 
         # Creating the full filepath
         filename = os.path.join(PASTES_FOLDER, paste)
+        filename = os.path.realpath(filename)
 
-        dirname = os.path.dirname(filename)
-        if not os.path.exists(dirname):
-            os.makedirs(dirname)
+        # incorrect filename
+        if not os.path.commonprefix([filename, PASTES_FOLDER]) == PASTES_FOLDER:
+            print('Path traversal detected {}'.format(filename))
+            publisher.warning('Global; Path traversal detected')
+        else:
+            dirname = os.path.dirname(filename)
+            if not os.path.exists(dirname):
+                os.makedirs(dirname)
 
-        decoded = base64.standard_b64decode(gzip64encoded)
+            decoded = base64.standard_b64decode(gzip64encoded)
 
-        with open(filename, 'wb') as f:
-            f.write(decoded)
-        '''try:
-            decoded2 = gunzip_bytes_obj(decoded)
-        except:
-            decoded2 =''
+            with open(filename, 'wb') as f:
+                f.write(decoded)
+            '''try:
+                decoded2 = gunzip_bytes_obj(decoded)
+            except:
+                decoded2 =''
 
-        type = magic.from_buffer(decoded2, mime=True)
+            type = magic.from_buffer(decoded2, mime=True)
 
-        if type!= 'text/x-c++' and type!= 'text/html' and type!= 'text/x-c' and type!= 'text/x-python' and type!= 'text/x-php' and type!= 'application/xml' and type!= 'text/x-shellscript' and type!= 'text/plain' and type!= 'text/x-diff' and type!= 'text/x-ruby':
+            if type!= 'text/x-c++' and type!= 'text/html' and type!= 'text/x-c' and type!= 'text/x-python' and type!= 'text/x-php' and type!= 'application/xml' and type!= 'text/x-shellscript' and type!= 'text/plain' and type!= 'text/x-diff' and type!= 'text/x-ruby':
 
-            print('-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------')
-            print(filename)
-            print(type)
-            print('-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------')
-        '''
+                print('-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------')
+                print(filename)
+                print(type)
+                print('-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------')
+            '''
 
-        # remove PASTES_FOLDER from item path (crawled item + submited)
-        if PASTES_FOLDERS in paste:
-            paste = paste.replace(PASTES_FOLDERS, '', 1)
-
-        p.populate_set_out(paste)
-        processed_paste+=1
+            p.populate_set_out(paste)
+            processed_paste+=1

From 4d8db3fcc4757cad99ed3bbea057e5080abaffa8 Mon Sep 17 00:00:00 2001
From: Terrtia <or1994@hotmail.fr>
Date: Mon, 3 Feb 2020 14:51:51 +0100
Subject: [PATCH 04/15] fix: [Global: already saved filename] save updated +
 filter duplicated items

---
 bin/Global.py | 71 ++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 53 insertions(+), 18 deletions(-)

diff --git a/bin/Global.py b/bin/Global.py
index b0419bea..7ef3c78e 100755
--- a/bin/Global.py
+++ b/bin/Global.py
@@ -21,14 +21,24 @@ Requirements
 
 """
 import base64
+import hashlib
+import io
+import gzip
 import os
+import sys
 import time
 import uuid
 from pubsublogger import publisher
 
 from Helper import Process
 
-import magic
+def gunzip_bytes_obj(bytes_obj):
+    in_ = io.BytesIO()
+    in_.write(bytes_obj)
+    in_.seek(0)
+    with gzip.GzipFile(fileobj=in_, mode='rb') as fo:
+        gunzipped_bytes_obj = fo.read()
+    return gunzipped_bytes_obj
 
 def rreplace(s, old, new, occurrence):
     li = s.rsplit(old, occurrence)
@@ -67,9 +77,9 @@ if __name__ == '__main__':
                 publisher.debug("Empty Paste: {0} not processed".format(message))
                 continue
         else:
-            print("Empty Queues: Waiting...")
+            #print("Empty Queues: Waiting...")
             if int(time.time() - time_1) > 30:
-                to_print = 'Global; ; ; ;glob Processed {0} paste(s)'.format(processed_paste)
+                to_print = 'Global; ; ; ;glob Processed {0} paste(s) in {1} s'.format(processed_paste, time.time() - time_1)
                 print(to_print)
                 #publisher.info(to_print)
                 time_1 = time.time()
@@ -95,28 +105,53 @@ if __name__ == '__main__':
             print('Path traversal detected {}'.format(filename))
             publisher.warning('Global; Path traversal detected')
         else:
+
+            # decode compressed base64
+            decoded = base64.standard_b64decode(gzip64encoded)
+
+            # check if file exist
+            if os.path.isfile(filename):
+                print('File already exist {}'.format(filename))
+                publisher.warning('Global; File already exist')
+
+
+                with gzip.open(filename, 'rb') as f:
+                    curr_file_content = f.read()
+                curr_file_md5 = hashlib.md5(curr_file_content).hexdigest()
+
+                new_file_content = gunzip_bytes_obj(decoded)
+                new_file_md5 = hashlib.md5(new_file_content).hexdigest()
+
+                if new_file_md5 != curr_file_md5:
+
+                    if filename.endswith('.gz'):
+                        filename = '{}_{}.gz'.format(filename[:-3], new_file_md5)
+                    else:
+                        filename = '{}_{}'.format(filename, new_file_md5)
+
+                    # continue if new file already exist
+                    if os.path.isfile(filename):
+                        print('ignore duplicated file')
+                        continue
+
+                    print('new file: {}'.format(filename))
+                # ignore duplicate
+                else:
+                    print('ignore duplicated file')
+                    continue
+
+            # create subdir
             dirname = os.path.dirname(filename)
             if not os.path.exists(dirname):
                 os.makedirs(dirname)
 
-            decoded = base64.standard_b64decode(gzip64encoded)
-
             with open(filename, 'wb') as f:
                 f.write(decoded)
-            '''try:
-                decoded2 = gunzip_bytes_obj(decoded)
-            except:
-                decoded2 =''
 
-            type = magic.from_buffer(decoded2, mime=True)
-
-            if type!= 'text/x-c++' and type!= 'text/html' and type!= 'text/x-c' and type!= 'text/x-python' and type!= 'text/x-php' and type!= 'application/xml' and type!= 'text/x-shellscript' and type!= 'text/plain' and type!= 'text/x-diff' and type!= 'text/x-ruby':
-
-                print('-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------')
-                print(filename)
-                print(type)
-                print('-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------')
-            '''
+            paste = filename
+            # remove PASTES_FOLDER from
+            if PASTES_FOLDERS in paste:
+                paste = paste.replace(PASTES_FOLDERS, '', 1)
 
             p.populate_set_out(paste)
             processed_paste+=1

From 8770bf05d732afe3a48cb03cd2b463c73bd8c061 Mon Sep 17 00:00:00 2001
From: Terrtia <or1994@hotmail.fr>
Date: Mon, 3 Feb 2020 15:29:37 +0100
Subject: [PATCH 05/15] fix: [IPAddress] catch empty config error

---
 bin/IPAddress.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/bin/IPAddress.py b/bin/IPAddress.py
index f03ee8b3..e45a4ce6 100755
--- a/bin/IPAddress.py
+++ b/bin/IPAddress.py
@@ -6,7 +6,7 @@ The IP Module
 
 This module is consuming the global channel.
 
-It first performs a regex to find IP addresses and then matches those IPs to 
+It first performs a regex to find IP addresses and then matches those IPs to
 some configured ip ranges.
 
 The list of IP ranges are expected to be in CIDR format (e.g. 192.168.0.0/16)
@@ -16,6 +16,7 @@ and should be defined in the config.cfg file, under the [IP] section
 
 import time
 import re
+import sys
 from pubsublogger import publisher
 from packages import Paste
 from Helper import Process
@@ -60,8 +61,12 @@ if __name__ == '__main__':
     p = Process(config_section)
 
     ip_networks = []
-    for network in p.config.get("IP", "networks").split(","):
-        ip_networks.append(IPv4Network(network))
+    try:
+        for network in p.config.get("IP", "networks").split(","):
+            ip_networks.append(IPv4Network(network))
+    except:
+        print('Please provide a list of valid IP addresses')
+        sys.exit(0)
 
 
     # Sent to the logging a description of the module
@@ -78,4 +83,3 @@ if __name__ == '__main__':
 
         # Do something with the message from the queue
         search_ip(message)
-

From f422be917af56f757b3f6df138ab08eecf103bc5 Mon Sep 17 00:00:00 2001
From: Terrtia <or1994@hotmail.fr>
Date: Tue, 4 Feb 2020 09:29:53 +0100
Subject: [PATCH 06/15] fix: [domain explorer web] fix pagination by daterange

---
 var/www/blueprints/crawler_splash.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/var/www/blueprints/crawler_splash.py b/var/www/blueprints/crawler_splash.py
index ee2e68ba..696b2d6e 100644
--- a/var/www/blueprints/crawler_splash.py
+++ b/var/www/blueprints/crawler_splash.py
@@ -154,5 +154,5 @@ def domains_explorer_web():
     except:
         page = 1
 
-    dict_data = Domain.domains_up_by_page('regular', page=page)
+    dict_data = Domain.domains_up_by_page('regular', page=page, date_from=date_from, date_to=date_to)
     return render_template("domain_explorer.html", dict_data=dict_data, bootstrap_label=bootstrap_label, domain_type='regular')

From 6b6c6a4d9e66bbc01db76ecc1261be88bd414aa7 Mon Sep 17 00:00:00 2001
From: Terrtia <or1994@hotmail.fr>
Date: Tue, 4 Feb 2020 09:34:05 +0100
Subject: [PATCH 07/15] fix: [domain explorer web] fix pagination by daterange

---
 var/www/blueprints/crawler_splash.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/var/www/blueprints/crawler_splash.py b/var/www/blueprints/crawler_splash.py
index 696b2d6e..7acecfbe 100644
--- a/var/www/blueprints/crawler_splash.py
+++ b/var/www/blueprints/crawler_splash.py
@@ -154,5 +154,5 @@ def domains_explorer_web():
     except:
         page = 1
 
-    dict_data = Domain.domains_up_by_page('regular', page=page, date_from=date_from, date_to=date_to)
+    dict_data = Domain.get_domains_up_by_filers('regular', page=page, date_from=date_from, date_to=date_to)
     return render_template("domain_explorer.html", dict_data=dict_data, bootstrap_label=bootstrap_label, domain_type='regular')

From 7888f5490232a937c8f03d467c640e72325307cd Mon Sep 17 00:00:00 2001
From: Mike Peters <mike@petmi627.lu>
Date: Thu, 6 Feb 2020 09:18:54 +0100
Subject: [PATCH 08/15] Added support for username as notification credentials

---
 bin/NotificationHelper.py | 6 +++++-
 configs/core.cfg.sample   | 3 +++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/bin/NotificationHelper.py b/bin/NotificationHelper.py
index 02568a1e..b9c90104 100755
--- a/bin/NotificationHelper.py
+++ b/bin/NotificationHelper.py
@@ -26,6 +26,7 @@ publisher.channel = "Script"
 def sendEmailNotification(recipient, alert_name, content):
 
     sender = config_loader.get_config_str("Notifications", "sender")
+    sender_user = config_loader.get_config_str("Notifications", "sender_user")
     sender_host = config_loader.get_config_str("Notifications", "sender_host")
     sender_port = config_loader.get_config_int("Notifications", "sender_port")
     sender_pw = config_loader.get_config_str("Notifications", "sender_pw")
@@ -49,7 +50,10 @@ def sendEmailNotification(recipient, alert_name, content):
                 smtp_server = smtplib.SMTP_SSL(sender_host, sender_port)
 
             smtp_server.ehlo()
-            smtp_server.login(sender, sender_pw)
+            if sender_user is not None:
+                smtp_server.login(sender_user, sender_pw)
+            else:
+                smtp_server.login(sender, sender_pw)
         else:
             smtp_server = smtplib.SMTP(sender_host, sender_port)
 
diff --git a/configs/core.cfg.sample b/configs/core.cfg.sample
index 632e1d07..f5d88d4e 100644
--- a/configs/core.cfg.sample
+++ b/configs/core.cfg.sample
@@ -28,6 +28,9 @@ sender = sender@example.com
 sender_host = smtp.example.com
 sender_port = 1337
 sender_pw = None
+# Only needed when the credentials for email server needs a username instead of an email address
+#sender_user = sender
+sender_user =
 
 # optional for using with authenticated SMTP over SSL
 # sender_pw = securepassword

From fc58940ed67f8b8b6c55f1b7ccc44c9da2d7b561 Mon Sep 17 00:00:00 2001
From: Terrtia <or1994@hotmail.fr>
Date: Thu, 6 Feb 2020 09:41:43 +0100
Subject: [PATCH 09/15] fix: [core Global] catch and log incomplete files

---
 bin/Global.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/bin/Global.py b/bin/Global.py
index 7ef3c78e..9a6b37e2 100755
--- a/bin/Global.py
+++ b/bin/Global.py
@@ -114,9 +114,14 @@ if __name__ == '__main__':
                 print('File already exist {}'.format(filename))
                 publisher.warning('Global; File already exist')
 
+                try:
+                    with gzip.open(filename, 'rb') as f:
+                        curr_file_content = f.read()
+                except EOFError:
+                    publisher.warning('Global; Incomplete file: {}'.format(filename))
+                    # discard item
+                    continue
 
-                with gzip.open(filename, 'rb') as f:
-                    curr_file_content = f.read()
                 curr_file_md5 = hashlib.md5(curr_file_content).hexdigest()
 
                 new_file_content = gunzip_bytes_obj(decoded)

From 62ce4646e5b605893eec81df22c902e7d1269073 Mon Sep 17 00:00:00 2001
From: Terrtia <or1994@hotmail.fr>
Date: Fri, 7 Feb 2020 10:12:38 +0100
Subject: [PATCH 10/15] fix: [search items tag] fix daterange

---
 bin/packages/Tag.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bin/packages/Tag.py b/bin/packages/Tag.py
index 03b9c990..bc5a290d 100755
--- a/bin/packages/Tag.py
+++ b/bin/packages/Tag.py
@@ -451,7 +451,7 @@ def delete_obj_tags(object_id, object_type, tags=[]):
             return res
 
 def sanitise_tags_date_range(l_tags, date_from=None, date_to=None):
-    if date_from or date_to is None:
+    if date_from is None or date_to is None:
         date_from = get_tags_min_last_seen(l_tags, r_int=False)
         date_to = date_from
     return Date.sanitise_date_range(date_from, date_to)

From d8fbd72863f56f33de5f70bc450e115861623866 Mon Sep 17 00:00:00 2001
From: Terrtia <or1994@hotmail.fr>
Date: Fri, 7 Feb 2020 10:53:45 +0100
Subject: [PATCH 11/15] fix: [Global catch incomplete file error] fix #464

---
 bin/Global.py | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/bin/Global.py b/bin/Global.py
index 9a6b37e2..3bf36215 100755
--- a/bin/Global.py
+++ b/bin/Global.py
@@ -28,10 +28,21 @@ import os
 import sys
 import time
 import uuid
+
+import datetime
+import redis
+
+sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
+import ConfigLoader
+
 from pubsublogger import publisher
 
 from Helper import Process
 
+config_loader = ConfigLoader.ConfigLoader()
+r_stats = config_loader.get_redis_conn("ARDB_Statistics")
+config_loader = None
+
 def gunzip_bytes_obj(bytes_obj):
     in_ = io.BytesIO()
     in_.write(bytes_obj)
@@ -119,6 +130,14 @@ if __name__ == '__main__':
                         curr_file_content = f.read()
                 except EOFError:
                     publisher.warning('Global; Incomplete file: {}'.format(filename))
+                    # save daily stats
+                    r_stats.zincrby('module:Global:incomplete_file', datetime.datetime.now().strftime('%Y%m%d'), 1)
+                    # discard item
+                    continue
+                except OSError:
+                    publisher.warning('Global; Not a gzipped file: {}'.format(filename))
+                    # save daily stats
+                    r_stats.zincrby('module:Global:invalid_file', datetime.datetime.now().strftime('%Y%m%d'), 1)
                     # discard item
                     continue
 

From f9856a1589cb49fc6e407587d7609bf4a3e6af23 Mon Sep 17 00:00:00 2001
From: Terrtia <or1994@hotmail.fr>
Date: Mon, 10 Feb 2020 10:31:53 +0100
Subject: [PATCH 12/15] fix: [module Webstats + BankAccount-Decoder] fix faup
 return type + remove old Paste library from BankAccount-Decoder #465

---
 bin/BankAccount.py   | 24 ++++++++++++------------
 bin/Decoder.py       | 38 ++++++++++++++++++--------------------
 bin/WebStats.py      |  5 ++++-
 bin/packages/Item.py |  3 +++
 4 files changed, 37 insertions(+), 33 deletions(-)

diff --git a/bin/BankAccount.py b/bin/BankAccount.py
index 16a8a11f..d3cb4c16 100755
--- a/bin/BankAccount.py
+++ b/bin/BankAccount.py
@@ -5,7 +5,7 @@
 The BankAccount Module
 ======================
 
-It apply IBAN regexes on paste content and warn if above a threshold.
+It apply IBAN regexes on item content and warn if above a threshold.
 
 """
 
@@ -17,7 +17,7 @@ import re
 import string
 from itertools import chain
 
-from packages import Paste
+from packages import Item
 from pubsublogger import publisher
 
 from Helper import Process
@@ -49,7 +49,7 @@ def is_valid_iban(iban):
         return True
     return False
 
-def check_all_iban(l_iban, paste, filename):
+def check_all_iban(l_iban, obj_id):
     nb_valid_iban = 0
     for iban in l_iban:
         iban = iban[0]+iban[1]+iban[2]
@@ -65,14 +65,14 @@ def check_all_iban(l_iban, paste, filename):
                 server_statistics.hincrby('iban_by_country:'+date, iban[0:2], 1)
 
     if(nb_valid_iban > 0):
-        to_print = 'Iban;{};{};{};'.format(paste.p_source, paste.p_date, paste.p_name)
+        to_print = 'Iban;{};{};{};'.format(Item.get_source(obj_id), Item.get_item_date(obj_id), Item.get_basename(obj_id))
         publisher.warning('{}Checked found {} IBAN;{}'.format(
-            to_print, nb_valid_iban, paste.p_rel_path))
-        msg = 'infoleak:automatic-detection="iban";{}'.format(filename)
+            to_print, nb_valid_iban, obj_id))
+        msg = 'infoleak:automatic-detection="iban";{}'.format(obj_id)
         p.populate_set_out(msg, 'Tags')
 
         #Send to duplicate
-        p.populate_set_out(filename, 'Duplicate')
+        p.populate_set_out(obj_id, 'Duplicate')
 
 if __name__ == "__main__":
     publisher.port = 6380
@@ -103,21 +103,21 @@ if __name__ == "__main__":
 
         if message is not None:
 
-            filename = message
-            paste = Paste.Paste(filename)
-            content = paste.get_p_content()
+            obj_id = Item.get_item_id(message)
+
+            content = Item.get_item_content(obj_id)
 
             signal.alarm(max_execution_time)
             try:
                 l_iban = iban_regex.findall(content)
             except TimeoutException:
-                 print ("{0} processing timeout".format(paste.p_rel_path))
+                 print ("{0} processing timeout".format(obj_id))
                  continue
             else:
                 signal.alarm(0)
 
             if(len(l_iban) > 0):
-                check_all_iban(l_iban, paste, filename)
+                check_all_iban(l_iban, obj_id)
 
         else:
             publisher.debug("Script BankAccount is Idling 10s")
diff --git a/bin/Decoder.py b/bin/Decoder.py
index 82133de7..9ea3adcc 100755
--- a/bin/Decoder.py
+++ b/bin/Decoder.py
@@ -17,7 +17,6 @@ import datetime
 from pubsublogger import publisher
 
 from Helper import Process
-from packages import Paste
 from packages import Item
 
 import re
@@ -50,11 +49,11 @@ def decode_string(content, message, date, encoded_list, decoder_name, encoded_mi
 
             save_hash(decoder_name, message, date, decode)
 
-            #remove encoded from paste content
+            #remove encoded from item content
             content = content.replace(encoded, '', 1)
 
     if(find):
-        set_out_paste(decoder_name, message)
+        set_out_item(decoder_name, message)
 
     return content
 
@@ -72,8 +71,8 @@ def save_hash(decoder_name, message, date, decoded):
     data['estimated type'] = type
     json_data = json.dumps(data)
 
-    date_paste = '{}/{}/{}'.format(date[0:4], date[4:6], date[6:8])
-    date_key = date[0:4] + date[4:6] + date[6:8]
+    date_item = '{}/{}/{}'.format(date[0:4], date[4:6], date[6:8])
+    date_key = date
 
     serv_metadata.incrby(decoder_name+'_decoded:'+date_key, 1)
     serv_metadata.zincrby('hash_date:'+date_key, hash, 1)
@@ -81,24 +80,24 @@ def save_hash(decoder_name, message, date, decoded):
 
     # first time we see this hash
     if not serv_metadata.hexists('metadata_hash:'+hash, 'estimated_type'):
-        serv_metadata.hset('metadata_hash:'+hash, 'first_seen', date_paste)
-        serv_metadata.hset('metadata_hash:'+hash, 'last_seen', date_paste)
+        serv_metadata.hset('metadata_hash:'+hash, 'first_seen', date_item)
+        serv_metadata.hset('metadata_hash:'+hash, 'last_seen', date_item)
     else:
-        serv_metadata.hset('metadata_hash:'+hash, 'last_seen', date_paste)
+        serv_metadata.hset('metadata_hash:'+hash, 'last_seen', date_item)
 
-    # first time we see this hash (all encoding) on this paste
+    # first time we see this hash (all encoding) on this item
     if serv_metadata.zscore('nb_seen_hash:'+hash, message) is None:
         serv_metadata.hincrby('metadata_hash:'+hash, 'nb_seen_in_all_pastes', 1)
-        serv_metadata.sadd('hash_paste:'+message, hash) # paste - hash map
+        serv_metadata.sadd('hash_paste:'+message, hash) # item - hash map
         # create hash metadata
         serv_metadata.hset('metadata_hash:'+hash, 'estimated_type', type)
         serv_metadata.sadd('hash_all_type', type)
 
-    # first time we see this hash encoding on this paste
+    # first time we see this hash encoding on this item
     if serv_metadata.zscore(decoder_name+'_hash:'+hash, message) is None:
         print('first '+decoder_name)
 
-        serv_metadata.sadd(decoder_name+'_paste:'+message, hash) # paste - hash map
+        serv_metadata.sadd(decoder_name+'_paste:'+message, hash) # item - hash map
 
         # create hash metadata
         serv_metadata.sadd('hash_'+ decoder_name +'_all_type', type)
@@ -118,8 +117,8 @@ def save_hash(decoder_name, message, date, decoded):
 
     serv_metadata.zincrby(decoder_name+'_type:'+type, date_key, 1)
 
-    serv_metadata.zincrby('nb_seen_hash:'+hash, message, 1)# hash - paste map
-    serv_metadata.zincrby(decoder_name+'_hash:'+hash, message, 1) # number of b64 on this paste
+    serv_metadata.zincrby('nb_seen_hash:'+hash, message, 1)# hash - item map
+    serv_metadata.zincrby(decoder_name+'_hash:'+hash, message, 1) # number of b64 on this item
 
     # Domain Object
     if Item.is_crawled(message):
@@ -150,7 +149,7 @@ def save_hash_on_disk(decode, type, hash, json_data):
     with open(filename_json, 'w') as f:
         f.write(json_data)
 
-def set_out_paste(decoder_name, message):
+def set_out_item(decoder_name, message):
     publisher.warning(decoder_name+' decoded')
     #Send to duplicate
     p.populate_set_out(message, 'Duplicate')
@@ -217,12 +216,11 @@ if __name__ == '__main__':
             time.sleep(1)
             continue
 
-        filename = message
-        paste = Paste.Paste(filename)
+        obj_id = Item.get_item_id(message)
 
         # Do something with the message from the queue
-        content = paste.get_p_content()
-        date = str(paste._get_p_date())
+        content = Item.get_item_content(obj_id)
+        date = Item.get_item_date(obj_id)
 
         for decoder in decoder_order: # add threshold and size limit
 
@@ -233,7 +231,7 @@ if __name__ == '__main__':
             except TimeoutException:
                 encoded_list = []
                 p.incr_module_timeout_statistic() # add encoder type
-                print ("{0} processing timeout".format(paste.p_rel_path))
+                print ("{0} processing timeout".format(obj_id))
                 continue
             else:
                 signal.alarm(0)
diff --git a/bin/WebStats.py b/bin/WebStats.py
index 7eecb0d2..10aba917 100755
--- a/bin/WebStats.py
+++ b/bin/WebStats.py
@@ -29,7 +29,10 @@ num_day_to_look = 5       # the detection of the progression start num_day_to_lo
 def analyse(server, field_name, date, url_parsed):
     field = url_parsed[field_name]
     if field is not None:
-        field = field.decode('utf8')
+        try: # faup version
+            field = field.decode()
+        except:
+            pass
         server.hincrby(field, date, 1)
         if field_name == "domain": #save domain in a set for the monthly plot
             domain_set_name = "domain_set_" + date[0:6]
diff --git a/bin/packages/Item.py b/bin/packages/Item.py
index b1722209..e9fcd18b 100755
--- a/bin/packages/Item.py
+++ b/bin/packages/Item.py
@@ -32,6 +32,9 @@ def exist_item(item_id):
     else:
         return False
 
+def get_basename(item_id):
+    return os.path.basename(item_id)
+
 def get_item_id(full_path):
     return full_path.replace(PASTES_FOLDER, '', 1)
 

From cf24c59e1d390db39b305b1aec7a17d3b0551b0f Mon Sep 17 00:00:00 2001
From: Terrtia <or1994@hotmail.fr>
Date: Mon, 10 Feb 2020 10:44:06 +0100
Subject: [PATCH 13/15] fix: [IPAdress] remove leading zeros #465

---
 bin/IPAddress.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/bin/IPAddress.py b/bin/IPAddress.py
index e45a4ce6..e2ed59cf 100755
--- a/bin/IPAddress.py
+++ b/bin/IPAddress.py
@@ -32,8 +32,9 @@ def search_ip(message):
     results = reg_ip.findall(content)
     matching_ips = []
 
-    for res in results:
-        address = IPv4Address(res)
+    for ip in results:
+        ip = '.'.join([str(int(x)) for x in ip.split('.')])
+        address = IPv4Address(ip)
         for network in ip_networks:
             if address in network:
                 matching_ips.append(address)

From 4097d95237a81e3c506743397eea4880622a1ccf Mon Sep 17 00:00:00 2001
From: Terrtia <or1994@hotmail.fr>
Date: Mon, 10 Feb 2020 11:04:24 +0100
Subject: [PATCH 14/15] fix: [SentimentAnalysis] download nltk punkt

---
 bin/SentimentAnalysis.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/bin/SentimentAnalysis.py b/bin/SentimentAnalysis.py
index fc9a2f4c..eb27d408 100755
--- a/bin/SentimentAnalysis.py
+++ b/bin/SentimentAnalysis.py
@@ -30,7 +30,7 @@ sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
 import ConfigLoader
 
 from nltk.sentiment.vader import SentimentIntensityAnalyzer
-from nltk import tokenize
+from nltk import tokenize, download
 
 # Config Variables
 accepted_Mime_type = ['text/plain']
@@ -62,7 +62,12 @@ def Analyse(message, server):
         combined_datetime = datetime.datetime.combine(the_date, the_time)
         timestamp = calendar.timegm(combined_datetime.timetuple())
 
-        sentences = tokenize.sent_tokenize(p_content)
+        try:
+            sentences = tokenize.sent_tokenize(p_content)
+        except Exception as e:
+            # use the NLTK Downloader to obtain the resource
+            download('punkt')
+            sentences = tokenize.sent_tokenize(p_content)
 
         if len(sentences) > 0:
             avg_score = {'neg': 0.0, 'neu': 0.0, 'pos': 0.0, 'compoundPos': 0.0, 'compoundNeg': 0.0}

From f950e700bd26900988de32a576655d725a7ff698 Mon Sep 17 00:00:00 2001
From: Terrtia <or1994@hotmail.fr>
Date: Mon, 10 Feb 2020 11:06:39 +0100
Subject: [PATCH 15/15] chg: [SentimentAnalysis] clean

---
 bin/SentimentAnalysis.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bin/SentimentAnalysis.py b/bin/SentimentAnalysis.py
index eb27d408..a90a3a09 100755
--- a/bin/SentimentAnalysis.py
+++ b/bin/SentimentAnalysis.py
@@ -64,7 +64,7 @@ def Analyse(message, server):
 
         try:
             sentences = tokenize.sent_tokenize(p_content)
-        except Exception as e:
+        except:
             # use the NLTK Downloader to obtain the resource
             download('punkt')
             sentences = tokenize.sent_tokenize(p_content)