Merge pull request #389 from CIRCL/term_trackerv1

Term trackerv1
Alexandre Dulaunoy 2019-09-12 14:06:05 +02:00 committed by GitHub
commit 35dcf428ff
No known key found for this signature in database
39 changed files with 2335 additions and 1536 deletions

View File

@ -109,8 +109,81 @@ Redis and ARDB overview
| **uuid**:ltags | **tag** |
| **uuid**:ltagsgalaxies | **tag** |
## DB2 - Leak Hunter:
##### Tracker metadata:
| Hset - Key | Field | Value |
| ------ | ------ | ------ |
| tracker:**uuid** | tracked | **tacked word/set/regex** |
| | type | **word/set/regex** |
| | date | **date added** |
| | user_id | **created by user_id** |
| | dashboard | **0/1 Display alert on dashboard** |
| | level | **0/1 Tracker visibility** |
##### Tracker by user_id (visibility level: user only):
| Set - Key | Value |
| ------ | ------ |
| user:tracker:**user_id** | **uuid - tracker uuid** |
| user:tracker:**user_id**:**word/set/regex - tracker type** | **uuid - tracker uuid** |
##### Global Tracker (visibility level: all users):
| Set - Key | Value |
| ------ | ------ |
| gobal:tracker | **uuid - tracker uuid** |
| gobal:tracker:**word/set/regex - tracker type** | **uuid - tracker uuid** |
##### All Tracker by type:
| Set - Key | Value |
| ------ | ------ |
| all:tracker:**word/set/regex - tracker type** | **tracked item** |
| Set - Key | Value |
| ------ | ------ |
| all:tracker_uuid:**tracker type**:**tracked item** | **uuid - tracker uuid** |
##### All Tracked items:
| Set - Key | Value |
| ------ | ------ |
| tracker:item:**uuid**:**date** | **item_id** |
##### All Tracked tags:
| Set - Key | Value |
| ------ | ------ |
| tracker:tags:**uuid** | **tag** |
##### All Tracked mail:
| Set - Key | Value |
| ------ | ------ |
| tracker:mail:**uuid** | **mail** |
##### Refresh Tracker:
| Key | Value |
| ------ | ------ |
| tracker:refresh:word | **last refreshed epoch** |
| tracker:refresh:set | - |
| tracker:refresh:regex | - |
##### Zset Stat Tracker:
| Key | Field | Value |
| ------ | ------ | ------ |
| tracker:stat:**uuid** | **date** | **nb_seen** |
##### Stat token:
| Key | Field | Value |
| ------ | ------ | ------ |
| stat_token_total_by_day:**date** | **word** | **nb_seen** |
| | | |
| stat_token_per_item_by_day:**date** | **word** | **nb_seen** |
| Set - Key | Value |
| ------ | ------ |
| stat_token_history | **date** |
## DB2 - TermFreq:
##### Set:
##### Set:
| Key | Value |
| ------ | ------ |
@ -118,6 +191,7 @@ Redis and ARDB overview
| TrackedSetSet | **tracked_set** |
| TrackedRegexSet | **tracked_regex** |
| | |
| | |
| tracked_**tracked_term** | **item_path** |
| set_**tracked_set** | **item_path** |
| regex_**tracked_regex** | **item_path** |

View File

@ -1,184 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
This module is consuming the Redis-list created by the ZMQ_Sub_Curve_Q Module.
This modules update a .csv file used to draw curves representing selected
words and their occurency per day.
..note:: The channel will have the name of the file created.
..note:: Module ZMQ_Something_Q and ZMQ_Something are closely bound, always put
the same Subscriber name in both of them.
This Module is also used for term frequency.
/!\ Top set management is done in the module Curve_manage_top_set
*Need running Redis instances. (Redis)
*Categories files of words in /files/ need to be created
*Need the ZMQ_PubSub_Tokenize_Q Module running to be able to work properly.
import redis
import time
from pubsublogger import publisher
from packages import lib_words
import os
import datetime
import calendar
from Helper import Process
# Email notifications
from NotificationHelper import *
# Config Variables
BlackListTermsSet_Name = "BlackListSetTermSet"
TrackedTermsSet_Name = "TrackedSetTermSet"
top_term_freq_max_set_cardinality = 20 # Max cardinality of the terms frequences set
oneDay = 60*60*24
top_termFreq_setName_day = ["TopTermFreq_set_day_", 1]
top_termFreq_setName_week = ["TopTermFreq_set_week", 7]
top_termFreq_setName_month = ["TopTermFreq_set_month", 31]
top_termFreq_set_array = [top_termFreq_setName_day,top_termFreq_setName_week, top_termFreq_setName_month]
TrackedTermsNotificationTagsPrefix_Name = "TrackedNotificationTags_"
# create direct link in mail
full_paste_url = "/showsavedpaste/?paste="
def check_if_tracked_term(term, path):
if term in server_term.smembers(TrackedTermsSet_Name):
#add_paste to tracked_word_set
set_name = "tracked_" + term
server_term.sadd(set_name, path)
print(term, 'addded', set_name, '->', path)
p.populate_set_out("New Term added", 'CurveManageTopSets')
# Send a notification only when the member is in the set
if term in server_term.smembers(TrackedTermsNotificationEnabled_Name):
# create mail body
mail_body = ("AIL Framework,\n"
"New occurrence for term: " + term + "\n"
''+full_paste_url + path)
# Send to every associated email adress
for email in server_term.smembers(TrackedTermsNotificationEmailsPrefix_Name + term):
sendEmailNotification(email, 'Term', mail_body)
# tag paste
for tag in server_term.smembers(TrackedTermsNotificationTagsPrefix_Name + term):
msg = '{};{}'.format(tag, path)
p.populate_set_out(msg, 'Tags')
def getValueOverRange(word, startDate, num_day):
to_return = 0
for timestamp in range(startDate, startDate - num_day*oneDay, -oneDay):
value = server_term.hget(timestamp, word)
to_return += int(value) if value is not None else 0
return to_return
if __name__ == "__main__":
publisher.port = 6380 = "Script"
config_section = 'Curve'
p = Process(config_section)
r_serv1 = redis.StrictRedis(
host=p.config.get("ARDB_Curve", "host"),
port=p.config.get("ARDB_Curve", "port"),
db=p.config.get("ARDB_Curve", "db"),
server_term = redis.StrictRedis(
host=p.config.get("ARDB_TermFreq", "host"),
port=p.config.get("ARDB_TermFreq", "port"),
db=p.config.get("ARDB_TermFreq", "db"),
# FUNCTIONS #"Script Curve started")
# create direct link in mail
full_paste_url = p.config.get("Notifications", "ail_domain") + full_paste_url
csv_path = os.path.join(os.environ['AIL_HOME'],
p.config.get("Directories", "wordtrending_csv"))
wordfile_path = os.path.join(os.environ['AIL_HOME'],
p.config.get("Directories", "wordsfile"))
message = p.get_from_set()
prec_filename = None
generate_new_graph = False
# Term Frequency
top_termFreq_setName_day = ["TopTermFreq_set_day_", 1]
top_termFreq_setName_week = ["TopTermFreq_set_week", 7]
top_termFreq_setName_month = ["TopTermFreq_set_month", 31]
while True:
if message is not None:
generate_new_graph = True
filename, word, score = message.split()
temp = filename.split('/')
date = temp[-4] + temp[-3] + temp[-2]
timestamp = calendar.timegm((int(temp[-4]), int(temp[-3]), int(temp[-2]), 0, 0, 0))
curr_set = top_termFreq_setName_day[0] + str(timestamp)
low_word = word.lower()
#Old curve with words in file
r_serv1.hincrby(low_word, date, int(score))
# Update redis
#consider the num of occurence of this term
curr_word_value = int(server_term.hincrby(timestamp, low_word, int(score)))
#1 term per paste
curr_word_value_perPaste = int(server_term.hincrby("per_paste_" + str(timestamp), low_word, int(1)))
# Add in set only if term is not in the blacklist
if low_word not in server_term.smembers(BlackListTermsSet_Name):
#consider the num of occurence of this term
server_term.zincrby(curr_set, low_word, float(score))
#1 term per paste
server_term.zincrby("per_paste_" + curr_set, low_word, float(1))
#Add more info for tracked terms
check_if_tracked_term(low_word, filename)
#send to RegexForTermsFrequency
to_send = "{} {} {}".format(filename, timestamp, word)
p.populate_set_out(to_send, 'RegexForTermsFrequency')
if generate_new_graph:
generate_new_graph = False
print('Building graph')
today =
year = today.year
month = today.month
lib_words.create_curve_with_word_file(r_serv1, csv_path,
wordfile_path, year,
publisher.debug("Script Curve is Idling")
message = p.get_from_set()

View File

@ -1,166 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
This module manage top sets for terms frequency.
Every 'refresh_rate' update the weekly and monthly set
import redis
import time
import datetime
import copy
from pubsublogger import publisher
from packages import lib_words
import datetime
import calendar
import os
import configparser
# Config Variables
Refresh_rate = 60*5 #sec
BlackListTermsSet_Name = "BlackListSetTermSet"
TrackedTermsSet_Name = "TrackedSetTermSet"
top_term_freq_max_set_cardinality = 20 # Max cardinality of the terms frequences set
oneDay = 60*60*24
num_day_month = 31
num_day_week = 7
top_termFreq_setName_day = ["TopTermFreq_set_day_", 1]
top_termFreq_setName_week = ["TopTermFreq_set_week", 7]
top_termFreq_setName_month = ["TopTermFreq_set_month", 31]
top_termFreq_set_array = [top_termFreq_setName_day,top_termFreq_setName_week, top_termFreq_setName_month]
def manage_top_set():
startDate =
startDate = startDate.replace(hour=0, minute=0, second=0, microsecond=0)
startDate = calendar.timegm(startDate.timetuple())
blacklist_size = int(server_term.scard(BlackListTermsSet_Name))
dico = {}
dico_per_paste = {}
# Retreive top data (max_card + blacklist_size) from days sets
for timestamp in range(startDate, startDate - top_termFreq_setName_month[1]*oneDay, -oneDay):
curr_set = top_termFreq_setName_day[0] + str(timestamp)
array_top_day = server_term.zrevrangebyscore(curr_set, '+inf', '-inf', withscores=True, start=0, num=top_term_freq_max_set_cardinality+blacklist_size)
array_top_day_per_paste = server_term.zrevrangebyscore("per_paste_" + curr_set, '+inf', '-inf', withscores=True, start=0, num=top_term_freq_max_set_cardinality+blacklist_size)
for word, value in array_top_day:
if word not in server_term.smembers(BlackListTermsSet_Name):
if word in dico.keys():
dico[word] += value
dico[word] = value
for word, value in array_top_day_per_paste:
if word not in server_term.smembers(BlackListTermsSet_Name):
if word in dico_per_paste.keys():
dico_per_paste[word] += value
dico_per_paste[word] = value
if timestamp == startDate - num_day_week*oneDay:
dico_week = copy.deepcopy(dico)
dico_week_per_paste = copy.deepcopy(dico_per_paste)
# convert dico into sorted array
array_month = []
for w, v in dico.items():
array_month.append((w, v))
array_month.sort(key=lambda tup: -tup[1])
array_month = array_month[0:20]
array_week = []
for w, v in dico_week.items():
array_week.append((w, v))
array_week.sort(key=lambda tup: -tup[1])
array_week = array_week[0:20]
# convert dico_per_paste into sorted array
array_month_per_paste = []
for w, v in dico_per_paste.items():
array_month_per_paste.append((w, v))
array_month_per_paste.sort(key=lambda tup: -tup[1])
array_month_per_paste = array_month_per_paste[0:20]
array_week_per_paste = []
for w, v in dico_week_per_paste.items():
array_week_per_paste.append((w, v))
array_week_per_paste.sort(key=lambda tup: -tup[1])
array_week_per_paste = array_week_per_paste[0:20]
# suppress every terms in top sets
for curr_set, curr_num_day in top_termFreq_set_array[1:3]:
for w in server_term.zrange(curr_set, 0, -1):
server_term.zrem(curr_set, w)
for w in server_term.zrange("per_paste_" + curr_set, 0, -1):
server_term.zrem("per_paste_" + curr_set, w)
# Add top term from sorted array in their respective sorted sets
for elem in array_week:
server_term.zadd(top_termFreq_setName_week[0], float(elem[1]), elem[0])
for elem in array_week_per_paste:
server_term.zadd("per_paste_" + top_termFreq_setName_week[0], float(elem[1]), elem[0])
for elem in array_month:
server_term.zadd(top_termFreq_setName_month[0], float(elem[1]), elem[0])
for elem in array_month_per_paste:
server_term.zadd("per_paste_" + top_termFreq_setName_month[0], float(elem[1]), elem[0])
timestamp = int(time.mktime(
value = str(timestamp) + ", " + "-"
r_temp.set("MODULE_"+ "CurveManageTopSets" + "_" + str(os.getpid()), value)
print("refreshed module")
if __name__ == '__main__':
# If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see
# Port of the redis instance used by pubsublogger
publisher.port = 6380
# Script is the default channel used for the modules. = 'Script'
configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg')
if not os.path.exists(configfile):
raise Exception('Unable to find the configuration file. \
Did you set environment variables? \
Or activate the virtualenv.')
cfg = configparser.ConfigParser()
# For Module Manager
r_temp = redis.StrictRedis(
host=cfg.get('RedisPubSub', 'host'),
port=cfg.getint('RedisPubSub', 'port'),
db=cfg.getint('RedisPubSub', 'db'),
timestamp = int(time.mktime(
value = str(timestamp) + ", " + "-"
r_temp.set("MODULE_"+ "CurveManageTopSets" + "_" + str(os.getpid()), value)
r_temp.sadd("MODULE_TYPE_"+ "CurveManageTopSets" , str(os.getpid()))
server_term = redis.StrictRedis(
host=cfg.get("ARDB_TermFreq", "host"),
port=cfg.getint("ARDB_TermFreq", "port"),
db=cfg.getint("ARDB_TermFreq", "db"),
decode_responses=True)"Script Curve_manage_top_set started")
# Sent to the logging a description of the module"Manage the top sets with the data created by the module curve.")
while True:
# Get one message from the input queue (module only work if linked with a queue)
time.sleep(Refresh_rate) # sleep a long time then manage the set

bin/ Executable file
View File

@ -0,0 +1,57 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
The DbCleaner Module
import os
import sys
import time
import datetime
from pubsublogger import publisher
import NotificationHelper
from packages import Date
from packages import Item
from packages import Term
def clean_term_db_stat_token():
all_stat_date = Term.get_all_token_stat_history()
list_date_to_keep = Date.get_date_range(31)
for date in all_stat_date:
if date not in list_date_to_keep:
# remove history
print('Term Stats Cleaned')
if __name__ == "__main__":
publisher.port = 6380 = "Script""DbCleaner started")
# low priority
daily_cleaner = True
current_date ="%Y%m%d")
while True:
if daily_cleaner:
daily_cleaner = False
new_date ="%Y%m%d")
if new_date != current_date:
current_date = new_date
daily_cleaner = True

View File

@ -1,48 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import argparse
import redis
from pubsublogger import publisher
from packages.lib_words import create_dirfile
import configparser
def main():
"""Main Function"""
cfg = configparser.ConfigParser()'./packages/config.cfg')
parser = argparse.ArgumentParser(
description='''This script is a part of the Analysis Information Leak
framework. It create a redis list called "listfile" which contain
the absolute filename of all the files from the directory given in
the argument "directory".''',
epilog='Example: ./ /home/2013/03/')
parser.add_argument('directory', type=str,
help='The directory to run inside', action='store')
parser.add_argument('-db', type=int, default=0,
help='The name of the Redis DB (default 0)',
choices=[0, 1, 2, 3, 4], action='store')
parser.add_argument('-ow', help='trigger the overwritting mode',
args = parser.parse_args()
r_serv = redis.StrictRedis(host=cfg.get("Redis_Queues", "host"),
port=cfg.getint("Redis_Queues", "port"),
db=cfg.getint("Redis_Queues", "db"),
publisher.port = 6380 = "Script"
create_dirfile(r_serv,, args.ow)
if __name__ == "__main__":

View File

@ -76,12 +76,15 @@ function helptext {
[-l | --launchAuto]
[-k | --killAll]
[-u | --update]
[-c | --configUpdate]
[-t | --thirdpartyUpdate]
[-h | --help]
[-l | --launchAuto] LAUNCH DB + Scripts
[-k | --killAll] Kill DB + Scripts
[-ks | --killscript] Scripts
[-u | --update] Update AIL
[-c | --crawler] LAUNCH Crawlers
[-f | --launchFeeder] LAUNCH Pystemon feeder
[-t | --thirdpartyUpdate] Update Web
[-m | --menu] Display Advanced Menu
[-h | --help] Help
@ -153,14 +156,10 @@ function launching_scripts {
sleep 0.1
screen -S "Script_AIL" -X screen -t "Duplicates" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./; read x"
sleep 0.1
screen -S "Script_AIL" -X screen -t "Lines" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./; read x"
sleep 0.1
screen -S "Script_AIL" -X screen -t "DomClassifier" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./; read x"
sleep 0.1
screen -S "Script_AIL" -X screen -t "Categ" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./; read x"
sleep 0.1
screen -S "Script_AIL" -X screen -t "Tokenize" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./; read x"
sleep 0.1
screen -S "Script_AIL" -X screen -t "CreditCards" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./; read x"
sleep 0.1
screen -S "Script_AIL" -X screen -t "BankAccount" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./; read x"
@ -175,13 +174,9 @@ function launching_scripts {
sleep 0.1
screen -S "Script_AIL" -X screen -t "Credential" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./; read x"
sleep 0.1
screen -S "Script_AIL" -X screen -t "Curve" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./; read x"
screen -S "Script_AIL" -X screen -t "TermTrackerMod" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./; read x"
sleep 0.1
screen -S "Script_AIL" -X screen -t "CurveManageTopSets" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./; read x"
sleep 0.1
screen -S "Script_AIL" -X screen -t "RegexForTermsFrequency" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./; read x"
sleep 0.1
screen -S "Script_AIL" -X screen -t "SetForTermsFrequency" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./; read x"
screen -S "Script_AIL" -X screen -t "RegexTracker" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./; read x"
sleep 0.1
screen -S "Script_AIL" -X screen -t "Indexer" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./; read x"
sleep 0.1
@ -213,6 +208,8 @@ function launching_scripts {
sleep 0.1
screen -S "Script_AIL" -X screen -t "SentimentAnalysis" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./; read x"
sleep 0.1
screen -S "Script_AIL" -X screen -t "DbCleaner" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./; read x"
sleep 0.1
screen -S "Script_AIL" -X screen -t "UpdateBackground" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./; read x"
sleep 0.1
screen -S "Script_AIL" -X screen -t "SubmitPaste" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./; read x"
@ -404,6 +401,18 @@ function launch_feeder {
function killscript {
if [[ $islogged || $isqueued || $isscripted || $isflasked || $isfeeded || $iscrawler ]]; then
echo -e $GREEN"Killing Script"$DEFAULT
kill $islogged $isqueued $isscripted $isflasked $isfeeded $iscrawler
sleep 0.2
echo -e $ROSE`screen -ls`$DEFAULT
echo -e $GREEN"\t* $islogged $isqueued $isscripted $isflasked $isfeeded $iscrawler killed."$DEFAULT
echo -e $RED"\t* No script to kill"$DEFAULT
function killall {
if [[ $isredis || $isardb || $islogged || $isqueued || $isscripted || $isflasked || $isfeeded || $iscrawler ]]; then
if [[ $isredis ]]; then
@ -463,10 +472,7 @@ function launch_all {
#If no params, display the menu
[[ $@ ]] || {
function menu_display {
options=("Redis" "Ardb" "Logs" "Queues" "Scripts" "Flask" "Killall" "Shutdown" "Update" "Update-config" "Update-thirdparty")
@ -479,6 +485,7 @@ function launch_all {
prompt="Check an option (again to uncheck, ENTER when done): "
while menu && read -rp "$prompt" numinput && [[ "$numinput" ]]; do
for num in $numinput; do
[[ "$num" != *[![:digit:]]* ]] && (( num > 0 && num <= ${#options[@]} )) || {
@ -533,6 +540,14 @@ function launch_all {
#If no params, display the help
[[ $@ ]] || {
#echo "$@"
@ -553,6 +568,10 @@ while [ "$1" != "" ]; do
-k | --killAll ) killall;
-ks | --killscript ) killscript;
-m | --menu ) menu_display;
-u | --update ) update;
-t | --thirdpartyUpdate ) update_thirdparty;
@ -565,7 +584,6 @@ while [ "$1" != "" ]; do
-kh | --khelp ) helptext;
* ) helptext
exit 1

View File

@ -1,85 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
The ZMQ_PubSub_Lines Module
This module is consuming the Redis-list created by the ZMQ_PubSub_Line_Q
It perform a sorting on the line's length and publish/forward them to
differents channels:
*Channel 1 if max length(line) < max
*Channel 2 if max length(line) > max
The collected informations about the processed pastes
(number of lines and maximum length line) are stored in Redis.
..note:: Module ZMQ_Something_Q and ZMQ_Something are closely bound, always put
the same Subscriber name in both of them.
*Need running Redis instances. (LevelDB & Redis)
*Need the ZMQ_PubSub_Line_Q Module running to be able to work properly.
import argparse
import time
from packages import Paste
from pubsublogger import publisher
from Helper import Process
if __name__ == '__main__':
publisher.port = 6380 = 'Script'
config_section = 'Lines'
p = Process(config_section)
parser = argparse.ArgumentParser(
description='This script is a part of the Analysis Information \
Leak framework.')
'-max', type=int, default=500,
help='The limit between "short lines" and "long lines"',
args = parser.parse_args()
tmp_string = "Lines script Subscribed to channel {} and Start to publish \
on channel Longlines, Shortlines"
while True:
message = p.get_from_set()
if message is not None:
PST = Paste.Paste(message)
publisher.debug("Tokeniser is idling 10s")
# FIXME do it in the paste class
lines_infos = PST.get_lines_info()
PST.save_attribute_redis("p_nb_lines", lines_infos[0])
PST.save_attribute_redis("p_max_length_line", lines_infos[1])
# FIXME Not used."Pastes_Objects", PST.p_rel_path)
if lines_infos[1] < args.max:
p.populate_set_out( PST.p_rel_path , 'LinesShort')
p.populate_set_out( PST.p_rel_path , 'LinesLong')
except IOError:
print("CRC Checksum Error on : ", PST.p_rel_path)

View File

@ -9,7 +9,6 @@ import time
import datetime
import redis
import os
from packages import lib_words
from packages.Date import Date
from pubsublogger import publisher
from Helper import Process

View File

@ -20,13 +20,6 @@ configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg')
publisher.port = 6380 = "Script"
# notifications enabled/disabled
TrackedTermsNotificationEnabled_Name = "TrackedNotifications"
# associated notification email addresses for a specific term`
# Keys will be e.g. TrackedNotificationEmails<TERMNAME>
TrackedTermsNotificationEmailsPrefix_Name = "TrackedNotificationEmails_"
def sendEmailNotification(recipient, alert_name, content):
if not os.path.exists(configfile):

View File

@ -1,157 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
This Module is used for term frequency.
It processes every paste coming from the global module and test the regexs
supplied in the term webpage.
import redis
import time
from pubsublogger import publisher
from packages import Paste
import calendar
import re
import signal
import time
from Helper import Process
# Email notifications
from NotificationHelper import *
class TimeoutException(Exception):
def timeout_handler(signum, frame):
raise TimeoutException
signal.signal(signal.SIGALRM, timeout_handler)
# Config Variables
BlackListTermsSet_Name = "BlackListSetTermSet"
TrackedTermsSet_Name = "TrackedSetTermSet"
TrackedRegexSet_Name = "TrackedRegexSet"
top_term_freq_max_set_cardinality = 20 # Max cardinality of the terms frequences set
oneDay = 60*60*24
top_termFreq_setName_day = ["TopTermFreq_set_day_", 1]
top_termFreq_setName_week = ["TopTermFreq_set_week", 7]
top_termFreq_setName_month = ["TopTermFreq_set_month", 31]
top_termFreq_set_array = [top_termFreq_setName_day, top_termFreq_setName_week, top_termFreq_setName_month]
TrackedTermsNotificationTagsPrefix_Name = "TrackedNotificationTags_"
# create direct link in mail
full_paste_url = "/showsavedpaste/?paste="
def refresh_dicos():
dico_regex = {}
dico_regexname_to_redis = {}
for regex_str in server_term.smembers(TrackedRegexSet_Name):
dico_regex[regex_str[1:-1]] = re.compile(regex_str[1:-1])
dico_regexname_to_redis[regex_str[1:-1]] = regex_str
return dico_regex, dico_regexname_to_redis
if __name__ == "__main__":
publisher.port = 6380 = "Script"
config_section = 'RegexForTermsFrequency'
p = Process(config_section)
max_execution_time = p.config.getint(config_section, "max_execution_time")
server_term = redis.StrictRedis(
host=p.config.get("ARDB_TermFreq", "host"),
port=p.config.get("ARDB_TermFreq", "port"),
db=p.config.get("ARDB_TermFreq", "db"),
# FUNCTIONS #"RegexForTermsFrequency script started")
# create direct link in mail
full_paste_url = p.config.get("Notifications", "ail_domain") + full_paste_url
# compile the regex
dico_refresh_cooldown = time.time()
dico_regex, dico_regexname_to_redis = refresh_dicos()
message = p.get_from_set()
# Regex Frequency
while True:
if message is not None:
if time.time() - dico_refresh_cooldown > DICO_REFRESH_TIME:
dico_refresh_cooldown = time.time()
dico_regex, dico_regexname_to_redis = refresh_dicos()
print('dico got refreshed')
filename = message
temp = filename.split('/')
timestamp = calendar.timegm((int(temp[-4]), int(temp[-3]), int(temp[-2]), 0, 0, 0))
curr_set = top_termFreq_setName_day[0] + str(timestamp)
paste = Paste.Paste(filename)
content = paste.get_p_content()
# iterate the word with the regex
for regex_str, compiled_regex in dico_regex.items():
matched =
except TimeoutException:
print ("{0} processing timeout".format(paste.p_rel_path))
if matched is not None: # there is a match
print('regex matched {}'.format(regex_str))
matched =
regex_str_complete = "/" + regex_str + "/"
# Add in Regex track set only if term is not in the blacklist
if regex_str_complete not in server_term.smembers(BlackListTermsSet_Name):
# Send a notification only when the member is in the set
if regex_str_complete in server_term.smembers(TrackedTermsNotificationEnabled_Name):
# create mail body
mail_body = ("AIL Framework,\n"
"New occurrence for regex: " + regex_str + "\n"
''+full_paste_url + filename)
# Send to every associated email adress
for email in server_term.smembers(TrackedTermsNotificationEmailsPrefix_Name + regex_str_complete):
sendEmailNotification(email, 'Term', mail_body)
# tag paste
for tag in server_term.smembers(TrackedTermsNotificationTagsPrefix_Name + regex_str_complete):
msg = '{};{}'.format(tag, filename)
p.populate_set_out(msg, 'Tags')
set_name = 'regex_' + dico_regexname_to_redis[regex_str]
new_to_the_set = server_term.sadd(set_name, filename)
new_to_the_set = True if new_to_the_set == 1 else False
# consider the num of occurence of this term
regex_value = int(server_term.hincrby(timestamp, dico_regexname_to_redis[regex_str], int(1)))
# 1 term per paste
if new_to_the_set:
regex_value_perPaste = int(server_term.hincrby("per_paste_" + str(timestamp), dico_regexname_to_redis[regex_str], int(1)))
server_term.zincrby("per_paste_" + curr_set, dico_regexname_to_redis[regex_str], float(1))
server_term.zincrby(curr_set, dico_regexname_to_redis[regex_str], float(1))
publisher.debug("Script RegexForTermsFrequency is Idling")
message = p.get_from_set()

bin/ Executable file
View File

@ -0,0 +1,96 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
This Module is used for regex tracking.
It processes every paste coming from the global module and test the regexs
supplied in the term webpage.
import os
import re
import sys
import time
import signal
from Helper import Process
from pubsublogger import publisher
import NotificationHelper
from packages import Item
from packages import Term
full_item_url = "/showsavedpaste/?paste="
mail_body_template = "AIL Framework,\nNew occurrence for term tracked regex: {}\nitem id: {}\nurl: {}{}"
dict_regex_tracked = Term.get_regex_tracked_words_dict()
last_refresh = time.time()
class TimeoutException(Exception):
def timeout_handler(signum, frame):
raise TimeoutException
signal.signal(signal.SIGALRM, timeout_handler)
def new_term_found(term, term_type, item_id, item_date):
uuid_list = Term.get_term_uuid_list(term, 'regex')
print('new tracked term found: {} in {}'.format(term, item_id))
for term_uuid in uuid_list:
Term.add_tracked_item(term_uuid, item_id, item_date)
tags_to_add = Term.get_term_tags(term_uuid)
for tag in tags_to_add:
msg = '{};{}'.format(tag, item_id)
p.populate_set_out(msg, 'Tags')
mail_to_notify = Term.get_term_mails(term_uuid)
if mail_to_notify:
mail_body = mail_body_template.format(term, item_id, full_item_url, item_id)
for mail in mail_to_notify:
NotificationHelper.sendEmailNotification(mail, 'Term Tracker', mail_body)
if __name__ == "__main__":
publisher.port = 6380 = "Script""Script RegexTracker started")
config_section = 'RegexTracker'
p = Process(config_section)
max_execution_time = p.config.getint(config_section, "max_execution_time")
ull_item_url = p.config.get("Notifications", "ail_domain") + full_item_url
# Regex Frequency
while True:
item_id = p.get_from_set()
if item_id is not None:
item_date = Item.get_item_date(item_id)
item_content = Item.get_item_content(item_id)
for regex in dict_regex_tracked:
matched = dict_regex_tracked[regex].search(item_content)
except TimeoutException:
print ("{0} processing timeout".format(paste.p_rel_path))
if matched:
new_term_found(regex, 'regex', item_id, item_date)
# refresh Tracked term
if last_refresh < Term.get_tracked_term_last_updated_by_type('regex'):
dict_regex_tracked = Term.get_regex_tracked_words_dict()
last_refresh = time.time()
print('Tracked set refreshed')

View File

@ -1,151 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
This Module is used for term frequency.
It processes every paste coming from the global module and test the sets
supplied in the term webpage.
import redis
import time
from pubsublogger import publisher
from packages import lib_words
from packages import Paste
import os
import datetime
import calendar
import re
import ast
from Helper import Process
# Email notifications
from NotificationHelper import *
# Config Variables
BlackListTermsSet_Name = "BlackListSetTermSet"
TrackedTermsSet_Name = "TrackedSetTermSet"
TrackedRegexSet_Name = "TrackedRegexSet"
TrackedSetSet_Name = "TrackedSetSet"
top_term_freq_max_set_cardinality = 20 # Max cardinality of the terms frequences set
oneDay = 60*60*24
top_termFreq_setName_day = ["TopTermFreq_set_day_", 1]
top_termFreq_setName_week = ["TopTermFreq_set_week", 7]
top_termFreq_setName_month = ["TopTermFreq_set_month", 31]
top_termFreq_set_array = [top_termFreq_setName_day,top_termFreq_setName_week, top_termFreq_setName_month]
TrackedTermsNotificationTagsPrefix_Name = "TrackedNotificationTags_"
# create direct link in mail
full_paste_url = "/showsavedpaste/?paste="
def add_quote_inside_tab(tab):
quoted_tab = "["
for elem in tab[1:-1].split(','):
elem = elem.lstrip().strip()
quoted_tab += "\'{}\', ".format(elem)
quoted_tab = quoted_tab[:-2] #remove trailing ,
quoted_tab += "]"
return str(quoted_tab)
if __name__ == "__main__":
publisher.port = 6380 = "Script"
config_section = 'SetForTermsFrequency'
p = Process(config_section)
server_term = redis.StrictRedis(
host=p.config.get("ARDB_TermFreq", "host"),
port=p.config.get("ARDB_TermFreq", "port"),
db=p.config.get("ARDB_TermFreq", "db"),
# FUNCTIONS #"RegexForTermsFrequency script started")
# create direct link in mail
full_paste_url = p.config.get("Notifications", "ail_domain") + full_paste_url
#get the dico and matching percent
dico_percent = {}
dico_set_tab = {}
dico_setname_to_redis = {}
for set_str in server_term.smembers(TrackedSetSet_Name):
tab_set = set_str[1:-1]
tab_set = add_quote_inside_tab(tab_set)
perc_finder = re.compile("\[[0-9]{1,3}\]").search(tab_set)
if perc_finder is not None:
match_percent =[1:-1]
dico_percent[tab_set] = float(match_percent)
dico_set_tab[tab_set] = ast.literal_eval(tab_set)
dico_setname_to_redis[tab_set] = set_str
message = p.get_from_set()
while True:
if message is not None:
filename = message
temp = filename.split('/')
timestamp = calendar.timegm((int(temp[-4]), int(temp[-3]), int(temp[-2]), 0, 0, 0))
content = Paste.Paste(filename).get_p_content()
curr_set = top_termFreq_setName_day[0] + str(timestamp)
#iterate over the words of the file
match_dico = {}
for word in content.split():
for cur_set, array_set in dico_set_tab.items():
for w_set in array_set[:-1]: #avoid the percent matching
if word == w_set:
match_dico[str(array_set)] += 1
except KeyError:
match_dico[str(array_set)] = 1
#compute matching %
for the_set, matchingNum in match_dico.items():
eff_percent = float(matchingNum) / float((len(ast.literal_eval(the_set))-1)) * 100 #-1 bc if the percent matching
if eff_percent >= dico_percent[the_set]:
# Send a notification only when the member is in the set
if dico_setname_to_redis[str(the_set)] in server_term.smembers(TrackedTermsNotificationEnabled_Name):
# create mail body
mail_body = ("AIL Framework,\n"
"New occurrence for term: " + dico_setname_to_redis[str(the_set)] + "\n"
''+full_paste_url + filename)
# Send to every associated email adress
for email in server_term.smembers(TrackedTermsNotificationEmailsPrefix_Name + dico_setname_to_redis[str(the_set)]):
sendEmailNotification(email, 'Term', mail_body)
# tag paste
for tag in server_term.smembers(TrackedTermsNotificationTagsPrefix_Name + dico_setname_to_redis[str(the_set)]):
msg = '{};{}'.format(tag, filename)
p.populate_set_out(msg, 'Tags')
print(the_set, "matched in", filename)
set_name = 'set_' + dico_setname_to_redis[the_set]
new_to_the_set = server_term.sadd(set_name, filename)
new_to_the_set = True if new_to_the_set == 1 else False
#consider the num of occurence of this set
set_value = int(server_term.hincrby(timestamp, dico_setname_to_redis[the_set], int(1)))
# FIXME - avoid using per paste as a set is checked over the entire paste
#1 term per paste
if new_to_the_set:
set_value_perPaste = int(server_term.hincrby("per_paste_" + str(timestamp), dico_setname_to_redis[the_set], int(1)))
server_term.zincrby("per_paste_" + curr_set, dico_setname_to_redis[the_set], float(1))
server_term.zincrby(curr_set, dico_setname_to_redis[the_set], float(1))
publisher.debug("Script RegexForTermsFrequency is Idling")
message = p.get_from_set()

bin/ Executable file
View File

@ -0,0 +1,124 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
The TermTracker Module
import os
import sys
import time
import signal
from Helper import Process
from pubsublogger import publisher
import NotificationHelper
from packages import Item
from packages import Term
sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'modules'))
import Flask_config
full_item_url = "/showsavedpaste/?paste="
mail_body_template = "AIL Framework,\nNew occurrence for term tracked term: {}\nitem id: {}\nurl: {}{}"
# loads tracked words
list_tracked_words = Term.get_tracked_words_list()
last_refresh_word = time.time()
set_tracked_words_list = Term.get_set_tracked_words_list()
last_refresh_set = time.time()
class TimeoutException(Exception):
def timeout_handler(signum, frame):
raise TimeoutException
signal.signal(signal.SIGALRM, timeout_handler)
def new_term_found(term, term_type, item_id, item_date):
uuid_list = Term.get_term_uuid_list(term, term_type)
print('new tracked term found: {} in {}'.format(term, item_id))
for term_uuid in uuid_list:
Term.add_tracked_item(term_uuid, item_id, item_date)
tags_to_add = Term.get_term_tags(term_uuid)
for tag in tags_to_add:
msg = '{};{}'.format(tag, item_id)
p.populate_set_out(msg, 'Tags')
mail_to_notify = Term.get_term_mails(term_uuid)
if mail_to_notify:
mail_body = mail_body_template.format(term, item_id, full_item_url, item_id)
for mail in mail_to_notify:
NotificationHelper.sendEmailNotification(mail, 'Term Tracker', mail_body)
if __name__ == "__main__":
publisher.port = 6380 = "Script""Script TermTrackerMod started")
config_section = 'TermTrackerMod'
p = Process(config_section)
max_execution_time = p.config.getint(config_section, "max_execution_time")
full_item_url = p.config.get("Notifications", "ail_domain") + full_item_url
while True:
item_id = p.get_from_set()
if item_id is not None:
item_date = Item.get_item_date(item_id)
item_content = Item.get_item_content(item_id)
dict_words_freq = Term.get_text_word_frequency(item_content)
except TimeoutException:
print ("{0} processing timeout".format(paste.p_rel_path))
# create token statistics
for word in dict_words_freq:
Term.create_token_statistics(item_date, word, dict_words_freq[word])
# check solo words
for word in list_tracked_words:
if word in dict_words_freq:
new_term_found(word, 'word', item_id, item_date)
# check words set
for elem in set_tracked_words_list:
list_words = elem[0]
nb_words_threshold = elem[1]
word_set = elem[2]
nb_uniq_word = 0
for word in list_words:
if word in dict_words_freq:
nb_uniq_word += 1
if nb_uniq_word >= nb_words_threshold:
new_term_found(word_set, 'set', item_id, item_date)
# refresh Tracked term
if last_refresh_word < Term.get_tracked_term_last_updated_by_type('word'):
list_tracked_words = Term.get_tracked_words_list()
last_refresh_word = time.time()
print('Tracked word refreshed')
if last_refresh_set < Term.get_tracked_term_last_updated_by_type('set'):
set_tracked_words_list = Term.get_set_tracked_words_list()
last_refresh_set = time.time()
print('Tracked set refreshed')

View File

@ -1,71 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
The Tokenize Module
This module is consuming the Redis-list created by the ZMQ_PubSub_Tokenize_Q
It tokenize the content of the paste and publish the result in the following
channel_name+' '+/path/of/the/paste.gz+' '+tokenized_word+' '+scoring
..seealso:: Paste method (_get_top_words)
..note:: Module ZMQ_Something_Q and ZMQ_Something are closely bound, always put
the same Subscriber name in both of them.
*Need running Redis instances. (Redis)
*Need the ZMQ_PubSub_Tokenize_Q Module running to be able to work properly.
import time
from packages import Paste
from pubsublogger import publisher
from Helper import Process
import signal
class TimeoutException(Exception):
def timeout_handler(signum, frame):
raise TimeoutException
signal.signal(signal.SIGALRM, timeout_handler)
if __name__ == "__main__":
publisher.port = 6380 = "Script"
config_section = 'Tokenize'
p = Process(config_section)
# LOGGING #"Tokeniser started")
while True:
message = p.get_from_set()
if message is not None:
paste = Paste.Paste(message)
for word, score in paste._get_top_words().items():
if len(word) >= 4:
msg = '{} {} {}'.format(paste.p_rel_path, word, score)
except TimeoutException:
print ("{0} processing timeout".format(paste.p_rel_path))
publisher.debug("Tokeniser is idling 10s")

View File

@ -1,5 +1,7 @@
import datetime
class Date(object):
"""docstring for Date"""
def __init__(self, *args):
@ -34,7 +36,6 @@ class Date(object): = day
def substract_day(self, numDay):
import datetime
computed_date =, int(self.month), int( - datetime.timedelta(numDay)
comp_year = str(computed_date.year)
comp_month = str(computed_date.month).zfill(2)
@ -50,3 +51,22 @@ def date_substract_day(date, num_day=1):
new_date =[0:4]), int(date[4:6]), int(date[6:8])) - datetime.timedelta(num_day)
new_date = str(new_date).replace('-', '')
return new_date
def get_date_range(num_day):
curr_date =
date = Date(str(curr_date.year)+str(curr_date.month).zfill(2)+str(
date_list = []
for i in range(0, num_day+1):
return list(reversed(date_list))
def substract_date(date_from, date_to):
date_from =[0:4]), int(date_from[4:6]), int(date_from[6:8]))
date_to =[0:4]), int(date_to[4:6]), int(date_to[6:8]))
delta = date_to - date_from # timedelta
l_date = []
for i in range(delta.days + 1):
date = date_from + datetime.timedelta(i)
l_date.append( date.strftime('%Y%m%d') )
return l_date

View File

@ -2,10 +2,13 @@
# -*-coding:UTF-8 -*
import os
import sys
import gzip
import redis
sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'modules'))
import Flask_config
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/'))
import Date
import Tag
@ -19,6 +22,9 @@ def exist_item(item_id):
return False
def get_item_id(full_path):
return full_path.replace(PASTES_FOLDER, '', 1)
def get_item_date(item_id):
l_directory = item_id.split('/')
return '{}{}{}'.format(l_directory[-4], l_directory[-3], l_directory[-2])
@ -131,3 +137,13 @@ def get_item_pgp_name(item_id):
def get_item_pgp_mail(item_id):
return _get_item_correlation('pgpdump', 'mail', item_id)
### GET Internal Module DESC
def get_item_list_desc(list_item_id):
desc_list = []
for item_id in list_item_id:
desc_list.append( {'id': item_id, 'date': get_item_date(item_id), 'tags': Tag.get_item_tags(item_id)} )
return desc_list

View File

@ -82,7 +82,7 @@ def get_item_tags(item_id):
if tags:
return list(tags)
return '[]'
return []
def add_items_tag(tags=[], galaxy_tags=[], item_id=None):

bin/packages/ Executable file
View File

@ -0,0 +1,483 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import re
import sys
import time
import uuid
import redis
import datetime
from collections import defaultdict
from nltk.tokenize import RegexpTokenizer
from textblob import TextBlob
sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'modules'))
import Flask_config
import Date
import Item
r_serv_term = Flask_config.r_serv_term
email_regex = Flask_config.email_regex
special_characters = set('[<>~!?@#$%^&*|()_-+={}":;,.\'\n\r\t]/\\')
# NLTK tokenizer
tokenizer = RegexpTokenizer('[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]+',
gaps=True, discard_empty=True)
def is_valid_uuid_v4(UUID):
UUID = UUID.replace('-', '')
uuid_test = uuid.UUID(hex=UUID, version=4)
return uuid_test.hex == UUID
return False
# # TODO: use new package => duplicate fct
def is_in_role(user_id, role):
if r_serv_db.sismember('user_role:{}'.format(role), user_id):
return True
return False
def check_term_uuid_valid_access(term_uuid, user_id):
if not is_valid_uuid_v4(term_uuid):
return ({"status": "error", "reason": "Invalid uuid"}, 400)
level = r_serv_term.hget('tracker:{}'.format(term_uuid), 'level')
if not level:
return ({"status": "error", "reason": "Unknown uuid"}, 404)
if level == 0:
if r_serv_term.hget('tracker:{}'.format(term_uuid), 'user_id') != user_id:
if not is_in_role(user_id, 'admin'):
return ({"status": "error", "reason": "Unknown uuid"}, 404)
return None
def is_valid_mail(email):
result = email_regex.match(email)
if result:
return True
return False
def verify_mail_list(mail_list):
for mail in mail_list:
if not is_valid_mail(mail):
return ({'status': 'error', 'reason': 'Invalid email', 'value': mail}, 400)
return None
def is_valid_regex(term_regex):
return True
return False
def get_text_word_frequency(item_content, filtering=True):
item_content = item_content.lower()
words_dict = defaultdict(int)
if filtering:
blob = TextBlob(item_content , tokenizer=tokenizer)
blob = TextBlob(item_content)
for word in blob.tokens:
words_dict[word] += 1
return words_dict
# # TODO: create all tracked words
def get_tracked_words_list():
return list(r_serv_term.smembers('all:tracker:word'))
def get_set_tracked_words_list():
set_list = r_serv_term.smembers('all:tracker:set')
all_set_list = []
for elem in set_list:
res = elem.split(';')
num_words = int(res[1])
ter_set = res[0].split(',')
all_set_list.append((ter_set, num_words, elem))
return all_set_list
def get_regex_tracked_words_dict():
regex_list = r_serv_term.smembers('all:tracker:regex')
dict_tracked_regex = {}
for regex in regex_list:
dict_tracked_regex[regex] = re.compile(regex)
return dict_tracked_regex
def get_tracked_term_list_item(term_uuid, date_from, date_to):
all_item_id = []
if date_from and date_to:
for date in r_serv_term.zrangebyscore('tracker:stat:{}'.format(term_uuid), int(date_from), int(date_to)):
all_item_id = all_item_id + list(r_serv_term.smembers('tracker:item:{}:{}'.format(term_uuid, date)))
return all_item_id
def is_term_tracked_in_global_level(term, term_type):
res = r_serv_term.smembers('all:tracker_uuid:{}:{}'.format(term_type, term))
if res:
for elem_uuid in res:
if r_serv_term.hget('tracker:{}'.format(elem_uuid), 'level')=='1':
return True
return False
def is_term_tracked_in_user_level(term, term_type, user_id):
res = r_serv_term.smembers('user:tracker:{}'.format(user_id))
if res:
for elem_uuid in res:
if r_serv_term.hget('tracker:{}'.format(elem_uuid), 'tracked')== term:
if r_serv_term.hget('tracker:{}'.format(elem_uuid), 'type')== term_type:
return True
return False
def parse_json_term_to_add(dict_input, user_id):
term = dict_input.get('term', None)
if not term:
return ({"status": "error", "reason": "Term not provided"}, 400)
term_type = dict_input.get('type', None)
if not term_type:
return ({"status": "error", "reason": "Term type not provided"}, 400)
nb_words = dict_input.get('nb_words', 1)
res = parse_tracked_term_to_add(term , term_type, nb_words=nb_words)
if res[1]!=200:
return res
term = res[0]['term']
term_type = res[0]['type']
tags = dict_input.get('tags', [])
mails = dict_input.get('mails', [])
res = verify_mail_list(mails)
if res:
return res
## TODO: add dashboard key
level = dict_input.get('level', 1)
level = int(level)
if level not in range(0, 1):
level = 1
level = 1
# check if term already tracked in global
if level==1:
if is_term_tracked_in_global_level(term, term_type):
return ({"status": "error", "reason": "Term already tracked"}, 409)
if is_term_tracked_in_user_level(term, term_type, user_id):
return ({"status": "error", "reason": "Term already tracked"}, 409)
term_uuid = add_tracked_term(term , term_type, user_id, level, tags, mails)
return ({'term': term, 'type': term_type, 'uuid': term_uuid}, 200)
def parse_tracked_term_to_add(term , term_type, nb_words=1):
if term_type=='regex':
if not is_valid_regex(term):
return ({"status": "error", "reason": "Invalid regex"}, 400)
elif term_type=='word' or term_type=='set':
# force lowercase
term = term.lower()
word_set = set(term)
set_inter = word_set.intersection(special_characters)
if set_inter:
return ({"status": "error", "reason": "special character not allowed", "message": "Please use a regex or remove all special characters"}, 400)
words = term.split()
# not a word
if term_type=='word' and len(words)>1:
term_type = 'set'
# ouput format: term1,term2,term3;2
if term_type=='set':
nb_words = int(nb_words)
nb_words = 1
if nb_words==0:
nb_words = 1
words_set = set(words)
words_set = sorted(words_set)
term = ",".join(words_set)
term = "{};{}".format(term, nb_words)
if nb_words > len(words_set):
nb_words = len(words_set)
return ({"status": "error", "reason": "Incorrect type"}, 400)
return ({"status": "success", "term": term, "type": term_type}, 200)
def add_tracked_term(term , term_type, user_id, level, tags, mails, dashboard=0):
term_uuid = str(uuid.uuid4())
# create metadata
r_serv_term.hset('tracker:{}'.format(term_uuid), 'tracked',term)
r_serv_term.hset('tracker:{}'.format(term_uuid), 'type', term_type)
r_serv_term.hset('tracker:{}'.format(term_uuid), 'date',"%Y%m%d"))
r_serv_term.hset('tracker:{}'.format(term_uuid), 'user_id', user_id)
r_serv_term.hset('tracker:{}'.format(term_uuid), 'level', level)
r_serv_term.hset('tracker:{}'.format(term_uuid), 'dashboard', dashboard)
# create all term set
r_serv_term.sadd('all:tracker:{}'.format(term_type), term)
# create term - uuid map
r_serv_term.sadd('all:tracker_uuid:{}:{}'.format(term_type, term), term_uuid)
# add display level set
if level == 0: # user only
r_serv_term.sadd('user:tracker:{}'.format(user_id), term_uuid)
r_serv_term.sadd('user:tracker:{}:{}'.format(user_id, term_type), term_uuid)
elif level == 1: # global
r_serv_term.sadd('global:tracker', term_uuid)
r_serv_term.sadd('global:tracker:{}'.format(term_type), term_uuid)
# create term tags list
for tag in tags:
r_serv_term.sadd('tracker:tags:{}'.format(term_uuid), tag)
# create term tags mail notification list
for mail in mails:
r_serv_term.sadd('tracker:mail:{}'.format(term_uuid), mail)
# toggle refresh module tracker list/set
r_serv_term.set('tracker:refresh:{}'.format(term_type), time.time())
return term_uuid
def parse_tracked_term_to_delete(dict_input, user_id):
term_uuid = dict_input.get("uuid", None)
res = check_term_uuid_valid_access(term_uuid, user_id)
if res:
return res
return ({"uuid": term_uuid}, 200)
def delete_term(term_uuid):
term = r_serv_term.hget('tracker:{}'.format(term_uuid), 'tracked')
term_type = r_serv_term.hget('tracker:{}'.format(term_uuid), 'type')
level = r_serv_term.hget('tracker:{}'.format(term_uuid), 'level')
r_serv_term.srem('all:tracker_uuid:{}:{}'.format(term_type, term), term_uuid)
# Term not tracked by other users
if not r_serv_term.exists('all:tracker_uuid:{}:{}'.format(term_type, term)):
r_serv_term.srem('all:tracker:{}'.format(term_type), term)
# toggle refresh module tracker list/set
r_serv_term.set('tracker:refresh:{}'.format(term_type), time.time())
if level == '0': # user only
user_id = term_type = r_serv_term.hget('tracker:{}'.format(term_uuid), 'user_id')
r_serv_term.srem('user:tracker:{}'.format(user_id), term_uuid)
r_serv_term.srem('user:tracker:{}:{}'.format(user_id, term_type), term_uuid)
elif level == '1': # global
r_serv_term.srem('global:tracker', term_uuid)
r_serv_term.srem('global:tracker:{}'.format(term_type), term_uuid)
# delete metatadata
# remove tags
# remove mails
# remove item set
all_item_date = r_serv_term.zrange('tracker:stat:{}'.format(term_uuid), 0, -1)
for date in all_item_date:
r_serv_term.delete('tracker:item:{}:{}'.format(term_uuid, date))
def replace_tracked_term_tags(term_uuid, tags):
for tag in tags:
r_serv_term.sadd('tracker:tags:{}'.format(term_uuid), tag)
def replace_tracked_term_mails(term_uuid, mails):
res = verify_mail_list(mails)
if res:
return res
for mail in mails:
r_serv_term.sadd('tracker:mail:{}'.format(term_uuid), mail)
def get_term_uuid_list(term, term_type):
return list(r_serv_term.smembers('all:tracker_uuid:{}:{}'.format(term_type, term)))
def get_term_tags(term_uuid):
return list(r_serv_term.smembers('tracker:tags:{}'.format(term_uuid)))
def get_term_mails(term_uuid):
return list(r_serv_term.smembers('tracker:mail:{}'.format(term_uuid)))
def add_tracked_item(term_uuid, item_id, item_date):
# track item
r_serv_term.sadd('tracker:item:{}:{}'.format(term_uuid, item_date), item_id)
# track nb item by date
r_serv_term.zadd('tracker:stat:{}'.format(term_uuid), item_date, int(item_date))
def create_token_statistics(item_date, word, nb):
r_serv_term.zincrby('stat_token_per_item_by_day:{}'.format(item_date), word, 1)
r_serv_term.zincrby('stat_token_total_by_day:{}'.format(item_date), word, nb)
r_serv_term.sadd('stat_token_history', item_date)
def delete_token_statistics_by_date(item_date):
r_serv_term.srem('stat_token_history', item_date)
def get_all_token_stat_history():
return r_serv_term.smembers('stat_token_history')
def get_tracked_term_last_updated_by_type(term_type):
epoch_update = r_serv_term.get('tracker:refresh:{}'.format(term_type))
if not epoch_update:
epoch_update = 0
return float(epoch_update)
def parse_get_tracker_term_item(dict_input, user_id):
term_uuid = dict_input.get('uuid', None)
res = check_term_uuid_valid_access(term_uuid, user_id)
if res:
return res
date_from = dict_input.get('date_from', None)
date_to = dict_input.get('date_to', None)
if date_from is None:
date_from = get_tracked_term_first_seen(term_uuid)
if date_from:
date_from = date_from[0]
if date_to is None:
date_to = date_from
if date_from > date_to:
date_from = date_to
all_item_id = get_tracked_term_list_item(term_uuid, date_from, date_to)
all_item_id = Item.get_item_list_desc(all_item_id)
res_dict = {}
res_dict['uuid'] = term_uuid
res_dict['date_from'] = date_from
res_dict['date_to'] = date_to
res_dict['items'] = all_item_id
return (res_dict, 200)
def get_tracked_term_first_seen(term_uuid):
res = r_serv_term.zrange('tracker:stat:{}'.format(term_uuid), 0, 0)
if res:
return res[0]
return None
def get_tracked_term_last_seen(term_uuid):
res = r_serv_term.zrevrange('tracker:stat:{}'.format(term_uuid), 0, 0)
if res:
return res[0]
return None
def get_term_metedata(term_uuid, user_id=False, level=False, tags=False, mails=False, sparkline=False):
dict_uuid = {}
dict_uuid['term'] = r_serv_term.hget('tracker:{}'.format(term_uuid), 'tracked')
dict_uuid['type'] = r_serv_term.hget('tracker:{}'.format(term_uuid), 'type')
dict_uuid['date'] = r_serv_term.hget('tracker:{}'.format(term_uuid), 'date')
dict_uuid['first_seen'] = get_tracked_term_first_seen(term_uuid)
dict_uuid['last_seen'] = get_tracked_term_last_seen(term_uuid)
if user_id:
dict_uuid['user_id'] = r_serv_term.hget('tracker:{}'.format(term_uuid), 'user_id')
if level:
dict_uuid['level'] = r_serv_term.hget('tracker:{}'.format(term_uuid), 'level')
if mails:
dict_uuid['mails'] = get_list_trackeed_term_mails(term_uuid)
if tags:
dict_uuid['tags'] = get_list_trackeed_term_tags(term_uuid)
if sparkline:
dict_uuid['sparkline'] = get_tracked_term_sparkline(term_uuid)
dict_uuid['uuid'] = term_uuid
return dict_uuid
def get_tracked_term_sparkline(tracker_uuid, num_day=6):
date_range_sparkline = Date.get_date_range(num_day)
sparklines_value = []
for date_day in date_range_sparkline:
nb_seen_this_day = r_serv_term.scard('tracker:item:{}:{}'.format(tracker_uuid, date_day))
if nb_seen_this_day is None:
nb_seen_this_day = 0
return sparklines_value
def get_list_tracked_term_stats_by_day(list_tracker_uuid, num_day=31, date_from=None, date_to=None):
if date_from and date_to:
date_range = Date.substract_date(date_from, date_to)
date_range = Date.get_date_range(num_day)
list_tracker_stats = []
for tracker_uuid in list_tracker_uuid:
dict_tracker_data = []
tracker = r_serv_term.hget('tracker:{}'.format(tracker_uuid), 'tracked')
for date_day in date_range:
nb_seen_this_day = r_serv_term.scard('tracker:item:{}:{}'.format(tracker_uuid, date_day))
if nb_seen_this_day is None:
nb_seen_this_day = 0
dict_tracker_data.append({"date": date_day,"value": int(nb_seen_this_day)})
list_tracker_stats.append({"name": tracker,"Data": dict_tracker_data})
return list_tracker_stats
def get_list_trackeed_term_tags(term_uuid):
res = r_serv_term.smembers('tracker:tags:{}'.format(term_uuid))
if res:
return list(res)
return []
def get_list_trackeed_term_mails(term_uuid):
res = r_serv_term.smembers('tracker:mail:{}'.format(term_uuid))
if res:
return list(res)
return []
def get_user_tracked_term_uuid(user_id, filter_type=None):
if filter_type:
return list(r_serv_term.smembers('user:tracker:{}:{}'.format(user_id,filter_type)))
return list(r_serv_term.smembers('user:tracker:{}'.format(user_id)))
def get_global_tracked_term_uuid(filter_type=None):
if filter_type:
return list(r_serv_term.smembers('global:tracker:{}'.format(filter_type)))
return list(r_serv_term.smembers('global:tracker'))
def get_all_user_tracked_terms(user_id, filter_type=None):
all_user_term = []
all_user_term_uuid = get_user_tracked_term_uuid(user_id, filter_type=filter_type)
for term_uuid in all_user_term_uuid:
all_user_term.append(get_term_metedata(term_uuid, tags=True, mails=True, sparkline=True))
return all_user_term
def get_all_global_tracked_terms(filter_type=None):
all_user_term = []
all_user_term_uuid = get_global_tracked_term_uuid(filter_type=filter_type)
for term_uuid in all_user_term_uuid:
all_user_term.append(get_term_metedata(term_uuid, user_id=True, tags=True, mails=True, sparkline=True))
return all_user_term

View File

@ -23,7 +23,7 @@ sentiment_lexicon_file = sentiment/
##### Notifications ######
ail_domain = http://localhost:7000
ail_domain = https://localhost:7000
sender =
sender_host =
sender_port = 1337
@ -107,7 +107,10 @@ operation_mode = 3
ttl_duplicate = 86400
default_unnamed_feed_name = unnamed_feeder
max_execution_time = 120
max_execution_time = 60
##### Redis #####
@ -177,6 +180,11 @@ host = localhost
port = 6382
db = 3
host = localhost
port = 6382
db = 3
host = localhost
db = 1

View File

@ -11,62 +11,10 @@ from dateutil.rrule import rrule, DAILY
import csv
def listdirectory(path):
"""Path Traversing Function.
:param path: -- The absolute pathname to a directory.
This function is returning all the absolute path of the files contained in
the argument directory.
fichier = []
for root, dirs, files in os.walk(path):
for i in files:
fichier.append(os.path.join(root, i))
return fichier
clean = lambda dirty: ''.join(filter(string.printable.__contains__, dirty))
"""It filters out non-printable characters from the string it receives."""
def create_dirfile(r_serv, directory, overwrite):
"""Create a file of path.
:param r_serv: -- connexion to redis database
:param directory: -- The folder where to launch the listing of the .gz files
This function create a list in redis with inside the absolute path
of all the pastes needed to be proceeded by function using parallel
(like redis_words_ranking)
if overwrite:
for x in listdirectory(directory):
r_serv.lpush("filelist", x)"The list was overwritten")
if r_serv.llen("filelist") == 0:
for x in listdirectory(directory):
r_serv.lpush("filelist", x)"New list created")
for x in listdirectory(directory):
r_serv.lpush("filelist", x)"The list was updated with new elements")
def create_curve_with_word_file(r_serv, csvfilename, feederfilename, year, month):
"""Create a csv file used with dygraph.

View File

@ -19,32 +19,17 @@ subscribe = Redis_Global
subscribe = Redis_Global
subscribe = Redis_Global
publish = Redis_LinesShort,Redis_LinesLong
subscribe = Redis_Global
subscribe = Redis_LinesShort
publish = Redis_Words
subscribe = Redis_Words
publish = Redis_CurveManageTopSets,Redis_Tags
subscribe = Redis_Global
publish = Redis_Tags
subscribe = Redis_Global
publish = Redis_Tags
subscribe = Redis_CurveManageTopSets
subscribe = Redis_Global
publish = Redis_CreditCards,Redis_Mail,Redis_Onion,Redis_Web,Redis_Credential,Redis_SourceCode,Redis_Cve,Redis_ApiKey

View File

@ -630,9 +630,6 @@ curl --header "Authorization: iHc
## Cryptocurrency
@ -743,6 +740,202 @@ curl --header "Aut
## Tracker
### Add term tracker: `api/v1/add/tracker/term`<a name="add_term_tracker"></a>
#### Description
Add term tracker
**Method** : `POST`
#### Parameters
- `term`
- term to add
- *str - word(s)*
- mandatory
- `nb_words`
- number of words in set
- *int*
- default: `1`
- `type`
- term type
- *str*
- mandatory: `word`, `set`, `regex`
- `tags`
- list of tags
- *list*
- default: `[]`
- `mails`
- list of mails to notify
- *list*
- default: `[]`
- `level`
- tracker visibility
- *int - 0: user only, 1: all users*
- default: `1`
#### JSON response
- `uuid`
- import uuid
- *uuid4*
#### Example
curl --header "Authorization: iHc1_ChZxj1aXmiFiF1mkxxQkzawwriEaZpPqyTQj " -H "Content-Type: application/json" --data @input.json -X POST
#### input.json Example
#### Expected Success Response
**HTTP Status Code** : `200`
#### Expected Fail Response
**HTTP Status Code** : `400`
### Delete term tracker: `api/v1/delete/tracker/term`<a name="delete_term_tracker"></a>
#### Description
Delete term tracker
**Method** : `DELETE`
#### Parameters
- `uuid`
- tracked term uuid
- *uuid4*
- mandatory
#### JSON response
- `uuid`
- deleted uuid
- *uuid4*
#### Example
curl --header "Authorization: iHc1_ChZxj1aXmiFiF1mkxxQkzawwriEaZpPqyTQj " -H "Content-Type: application/json" --data @input.json -X POST
#### input.json Example
#### Expected Success Response
**HTTP Status Code** : `200`
#### Expected Fail Response
**HTTP Status Code** : `400`
### Delete term tracker: `api/v1/delete/tracker/term/item`<a name="delete_term_tracker"></a>
#### Description
Delete term tracker
**Method** : `POST`
#### Parameters
- `uuid`
- tracked term uuid
- *uuid4*
- mandatory
- `date_from`
- date from
- *str - YYMMDD*
- default: last tracked items date
- `date_to`
- date to
- *str - YYMMDD*
- default: `None`
#### JSON response
- `uuid`
- term uuid
- *uuid4*
- `date_from`
- date from
- *str - YYMMDD*
- `date_to`
- date to
- *str - YYMMDD*
- `items`
- list of item id
- *list*
#### Example
curl --header "Authorization: iHc1_ChZxj1aXmiFiF1mkxxQkzawwriEaZpPqyTQj " -H "Content-Type: application/json" --data @input.json -X POST
#### input.json Example
#### Expected Success Response
**HTTP Status Code** : `200`
#### Expected Fail Response
**HTTP Status Code** : `400`
## Import management

View File

@ -20,8 +20,8 @@ export PATH=$AIL_FLASK:$PATH
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
bash ${AIL_BIN}/ -k
echo -e $GREEN"Shutting down AIL Script ..."$DEFAULT
bash ${AIL_BIN}/ -ks
echo ""
@ -37,8 +37,8 @@ echo ""
echo ""
echo ""
echo -e $GREEN"Shutting down ARDB ..."$DEFAULT
bash ${AIL_BIN}/ -k
echo -e $GREEN"Killing Script ..."$DEFAULT
bash ${AIL_BIN}/ -ks
echo ""

update/v2.2/ Executable file
View File

@ -0,0 +1,122 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import re
import sys
import time
import redis
import datetime
import configparser
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages'))
import Item
import Term
def rreplace(s, old, new, occurrence):
li = s.rsplit(old, occurrence)
return new.join(li)
if __name__ == '__main__':
start_deb = time.time()
configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg.sample')
if not os.path.exists(configfile):
raise Exception('Unable to find the configuration file. \
Did you set environment variables? \
Or activate the virtualenv.')
cfg = configparser.ConfigParser()
r_serv_term_stats = redis.StrictRedis(
host=cfg.get("ARDB_Trending", "host"),
port=cfg.getint("ARDB_Trending", "port"),
db=cfg.getint("ARDB_Trending", "db"),
r_serv_termfreq = redis.StrictRedis(
host=cfg.get("ARDB_TermFreq", "host"),
port=cfg.getint("ARDB_TermFreq", "port"),
db=cfg.getint("ARDB_TermFreq", "db"),
#convert all regex:
all_regex = r_serv_termfreq.smembers('TrackedRegexSet')
for regex in all_regex:
tags = list( r_serv_termfreq.smembers('TrackedNotificationTags_{}'.format(regex)) )
mails = list( r_serv_termfreq.smembers('TrackedNotificationEmails_{}'.format(regex)) )
new_term = regex[1:-1]
res = Term.parse_json_term_to_add({"term": new_term, "type": 'regex', "tags": tags, "mails": mails, "level": 1}, 'admin@admin.test')
if res[1] == 200:
term_uuid = res[0]['uuid']
list_items = r_serv_termfreq.smembers('regex_{}'.format(regex))
for paste_item in list_items:
item_id = Item.get_item_id(paste_item)
item_date = Item.get_item_date(item_id)
Term.add_tracked_item(term_uuid, item_id, item_date)
# Invalid Tracker => remove it
print('Invalid Regex Removed: {}'.format(regex))
# allow reprocess
r_serv_termfreq.srem('TrackedRegexSet', regex)
all_tokens = r_serv_termfreq.smembers('TrackedSetTermSet')
for token in all_tokens:
tags = list( r_serv_termfreq.smembers('TrackedNotificationTags_{}'.format(token)) )
mails = list( r_serv_termfreq.smembers('TrackedNotificationEmails_{}'.format(token)) )
res = Term.parse_json_term_to_add({"term": token, "type": 'word', "tags": tags, "mails": mails, "level": 1}, 'admin@admin.test')
if res[1] == 200:
term_uuid = res[0]['uuid']
list_items = r_serv_termfreq.smembers('tracked_{}'.format(token))
for paste_item in list_items:
item_id = Item.get_item_id(paste_item)
item_date = Item.get_item_date(item_id)
Term.add_tracked_item(term_uuid, item_id, item_date)
# Invalid Tracker => remove it
print('Invalid Token Removed: {}'.format(token))
# allow reprocess
r_serv_termfreq.srem('TrackedSetTermSet', token)
all_set = r_serv_termfreq.smembers('TrackedSetSet')
for curr_set in all_set:
tags = list( r_serv_termfreq.smembers('TrackedNotificationTags_{}'.format(curr_set)) )
mails = list( r_serv_termfreq.smembers('TrackedNotificationEmails_{}'.format(curr_set)) )
to_remove = ',{}'.format(curr_set.split(',')[-1])
new_set = rreplace(curr_set, to_remove, '', 1)
new_set = new_set[2:]
new_set = new_set.replace(',', '')
res = Term.parse_json_term_to_add({"term": new_set, "type": 'set', "nb_words": 1, "tags": tags, "mails": mails, "level": 1}, 'admin@admin.test')
if res[1] == 200:
term_uuid = res[0]['uuid']
list_items = r_serv_termfreq.smembers('tracked_{}'.format(curr_set))
for paste_item in list_items:
item_id = Item.get_item_id(paste_item)
item_date = Item.get_item_date(item_id)
Term.add_tracked_item(term_uuid, item_id, item_date)
# Invalid Tracker => remove it
print('Invalid Set Removed: {}'.format(curr_set))
# allow reprocess
r_serv_termfreq.srem('TrackedSetSet', curr_set)
#Set current ail version
r_serv.set('ail:version', 'v2.2')
#Set current ail version
r_serv.hset('ail:update_date', 'v2.2',"%Y%m%d"))

View File

@ -61,9 +61,9 @@ r_serv_sentiment = redis.StrictRedis(
r_serv_term = redis.StrictRedis(
host=cfg.get("ARDB_TermFreq", "host"),
port=cfg.getint("ARDB_TermFreq", "port"),
db=cfg.getint("ARDB_TermFreq", "db"),
host=cfg.get("ARDB_Tracker", "host"),
port=cfg.getint("ARDB_Tracker", "port"),
db=cfg.getint("ARDB_Tracker", "db"),
r_serv_cred = redis.StrictRedis(

View File

@ -20,6 +20,7 @@
<script language="javascript" src="{{ url_for('static', filename='js/moment.min.js') }}"></script>
<script language="javascript" src="{{ url_for('static', filename='js/jquery.daterangepicker.min.js') }}"></script>
<script language="javascript" src="{{ url_for('static', filename='js/d3.min.js') }}"></script>
<script src="{{ url_for('static', filename='js/plugins/d3/sparklines.js')}}"></script>
.input-group .form-control {
@ -246,45 +247,10 @@ function toggle_sidebar(){
// a sparklines plot
function sparklines(id, points) {
var width = 100, height = 60;
var data = []
for (i = 0; i < points.length; i++) {
data[i] = {
'x': i,
'y': +points[i]
var x = d3.scaleLinear()
.range([0, width - 10])
var y = d3.scaleLinear()
.range([height, 0])
var line = d3.line()
.x(function(d) {return x(d.x)})
.y(function(d) {return y(d.y)});"#"+id).append('svg')
.attr('width', width)
.attr('height', height)
.attr('d', line);
{% for key_id in all_metadata %}
sparklines("sparklines_{{ all_metadata[key_id]['sparklines_id'] }}", {{ all_metadata[key_id]['sparklines_data'] }})
sparkline("sparklines_{{ all_metadata[key_id]['sparklines_id'] }}", {{ all_metadata[key_id]['sparklines_data'] }}, {});
{% endfor %}

View File

@ -20,6 +20,7 @@
<script language="javascript" src="{{ url_for('static', filename='js/moment.min.js') }}"></script>
<script language="javascript" src="{{ url_for('static', filename='js/jquery.daterangepicker.min.js') }}"></script>
<script language="javascript" src="{{ url_for('static', filename='js/d3.min.js') }}"></script>
<script src="{{ url_for('static', filename='js/plugins/d3/sparklines.js')}}"></script>
.input-group .form-control {
@ -296,46 +297,10 @@ function toggle_sidebar(){
//var data = [6,3,3,2,5,3,9];
// a sparklines plot
function sparklines(id, points) {
var width = 100, height = 60;
var data = []
for (i = 0; i < points.length; i++) {
data[i] = {
'x': i,
'y': +points[i]
var x = d3.scaleLinear()
.range([0, width - 10])
var y = d3.scaleLinear()
.range([height, 0])
var line = d3.line()
.x(function(d) {return x(d.x)})
.y(function(d) {return y(d.y)});"#"+id).append('svg')
.attr('width', width)
.attr('height', height)
.attr('d', line);
{% for b64 in l_64 %}
sparklines("sparklines_{{ b64[2] }}", {{ b64[10] }})
sparkline("sparklines_{{ b64[2] }}", {{ b64[10] }}, {});
{% endfor %}

View File

@ -16,6 +16,7 @@
<script src="{{ url_for('static', filename='js/popper.min.js')}}"></script>
<script src="{{ url_for('static', filename='js/bootstrap4.min.js')}}"></script>
<script language="javascript" src="{{ url_for('static', filename='js/d3.min.js') }}"></script>
<script src="{{ url_for('static', filename='js/plugins/d3/sparklines.js')}}"></script>
<style> {
@ -25,11 +26,6 @@
stroke: red;
stroke-width: 2px
.line_sparkline {
fill: none;
stroke: #000;
stroke-width: 2.0px;
.node {
pointer-events: all;
@ -179,7 +175,7 @@
var all_graph = {};
sparklines("sparkline", {{ sparkline_values }})
sparkline("sparkline", {{ sparkline_values }}, {});
all_graph.node_graph = create_graph("{{ url_for(graph_node_endpoint) }}?type_id={{type_id}}&key_id={{key_id}}");
all_graph.line_chart = create_line_chart('graph_line', "{{ url_for(graph_line_endpoint) }}?type_id={{type_id}}&key_id={{key_id}}");
@ -212,43 +208,6 @@ function toggle_sidebar(){
//var data = [6,3,3,2,5,3,9];
// a sparklines plot
function sparklines(id, points) {
var width_spark = 100, height_spark = 60;
var data = []
for (i = 0; i < points.length; i++) {
data[i] = {
'x': i,
'y': +points[i]
var x = d3.scaleLinear()
.range([0, width_spark - 10])
var y = d3.scaleLinear()
.range([height_spark, 0])
var line = d3.line()
.x(function(d) {return x(d.x)})
.y(function(d) {return y(d.y)});"#"+id).append('svg')
.attr('width', width_spark)
.attr('height', height_spark)
.attr('d', line);
var width = 400,

View File

@ -16,6 +16,7 @@
<script src="{{ url_for('static', filename='js/popper.min.js')}}"></script>
<script src="{{ url_for('static', filename='js/bootstrap4.min.js')}}"></script>
<script language="javascript" src="{{ url_for('static', filename='js/d3.min.js') }}"></script>
<script src="{{ url_for('static', filename='js/plugins/d3/sparklines.js')}}"></script>
<style> {
@ -25,11 +26,6 @@
stroke: red;
stroke-width: 2px
.line_sparkline {
fill: none;
stroke: #000;
stroke-width: 2.0px;
.node {
pointer-events: all;
@ -234,7 +230,7 @@
var all_graph = {};
sparklines("sparkline", {{ sparkline_values }})
sparkline("sparkline", {{ sparkline_values }}, {});
all_graph.node_graph = create_graph("{{ url_for('hashDecoded.hash_graph_node_json') }}?hash={{hash}}");
all_graph.line_chart = create_line_chart('graph_line', "{{ url_for('hashDecoded.hash_graph_line_json') }}?hash={{hash}}");
@ -288,43 +284,6 @@ function toggle_sidebar(){
//var data = [6,3,3,2,5,3,9];
// a sparklines plot
function sparklines(id, points) {
var width_spark = 100, height_spark = 60;
var data = []
for (i = 0; i < points.length; i++) {
data[i] = {
'x': i,
'y': +points[i]
var x = d3.scaleLinear()
.range([0, width_spark - 10])
var y = d3.scaleLinear()
.range([height_spark, 0])
var line = d3.line()
.x(function(d) {return x(d.x)})
.y(function(d) {return y(d.y)});"#"+id).append('svg')
.attr('width', width_spark)
.attr('height', height_spark)
.attr('d', line);
var width = 400,

View File

@ -0,0 +1,216 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
Flask functions and routes for tracked items
import json
import redis
import datetime
import calendar
import flask
from flask import Flask, render_template, jsonify, request, Blueprint, url_for, redirect, Response
from Role_Manager import login_admin, login_analyst
from flask_login import login_required, current_user
import re
from pprint import pprint
import Levenshtein
# ---------------------------------------------------------------
import Paste
import Term
# ============ VARIABLES ============
import Flask_config
app =
cfg = Flask_config.cfg
baseUrl = Flask_config.baseUrl
r_serv_term = Flask_config.r_serv_term
r_serv_cred = Flask_config.r_serv_cred
r_serv_db = Flask_config.r_serv_db
bootstrap_label = Flask_config.bootstrap_label
hunter = Blueprint('hunter', __name__, template_folder='templates')
# ============ FUNCTIONS ============
# ============ ROUTES ============
def tracked_menu():
user_id = current_user.get_id()
user_term = Term.get_all_user_tracked_terms(user_id)
global_term = Term.get_all_global_tracked_terms()
return render_template("trackersManagement.html", user_term=user_term, global_term=global_term, bootstrap_label=bootstrap_label)
def tracked_menu_word():
filter_type = 'word'
user_id = current_user.get_id()
user_term = Term.get_all_user_tracked_terms(user_id, filter_type='word')
global_term = Term.get_all_global_tracked_terms(filter_type='word')
return render_template("trackersManagement.html", user_term=user_term, global_term=global_term, bootstrap_label=bootstrap_label, filter_type=filter_type)
def tracked_menu_set():
filter_type = 'set'
user_id = current_user.get_id()
user_term = Term.get_all_user_tracked_terms(user_id, filter_type=filter_type)
global_term = Term.get_all_global_tracked_terms(filter_type=filter_type)
return render_template("trackersManagement.html", user_term=user_term, global_term=global_term, bootstrap_label=bootstrap_label, filter_type=filter_type)
def tracked_menu_regex():
filter_type = 'regex'
user_id = current_user.get_id()
user_term = Term.get_all_user_tracked_terms(user_id, filter_type=filter_type)
global_term = Term.get_all_global_tracked_terms(filter_type=filter_type)
return render_template("trackersManagement.html", user_term=user_term, global_term=global_term, bootstrap_label=bootstrap_label, filter_type=filter_type)
@hunter.route("/tracker/add", methods=['GET', 'POST'])
def add_tracked_menu():
if request.method == 'POST':
term = request.form.get("term")
term_type = request.form.get("tracker_type")
nb_words = request.form.get("nb_word", 1)
level = request.form.get("level", 0)
tags = request.form.get("tags", [])
mails = request.form.get("mails", [])
if level == 'on':
level = 1
if mails:
mails = mails.split()
if tags:
tags = tags.split()
input_dict = {"term": term, "type": term_type, "nb_words": nb_words, "tags": tags, "mails": mails, "level": level}
user_id = current_user.get_id()
res = Term.parse_json_term_to_add(input_dict, user_id)
if res[1] == 200:
return redirect(url_for('hunter.tracked_menu'))
## TODO: use modal
return Response(json.dumps(res[0], indent=2, sort_keys=True), mimetype='application/json'), res[1]
return render_template("Add_tracker.html")
def show_tracker():
user_id = current_user.get_id()
term_uuid = request.args.get('uuid', None)
res = Term.check_term_uuid_valid_access(term_uuid, user_id)
if res: # invalid access
return Response(json.dumps(res[0], indent=2, sort_keys=True), mimetype='application/json'), res[1]
date_from = request.args.get('date_from')
date_to = request.args.get('date_to')
if date_from:
date_from = date_from.replace('-', '')
if date_to:
date_to = date_to.replace('-', '')
tracker_metadata = Term.get_term_metedata(term_uuid, user_id=True, level=True, tags=True, mails=True, sparkline=True)
if date_from:
res = Term.parse_get_tracker_term_item({'uuid': term_uuid, 'date_from': date_from, 'date_to': date_to}, user_id)
if res[1] !=200:
return Response(json.dumps(res[0], indent=2, sort_keys=True), mimetype='application/json'), res[1]
tracker_metadata['items'] = res[0]['items']
tracker_metadata['date_from'] = res[0]['date_from']
tracker_metadata['date_to'] = res[0]['date_to']
tracker_metadata['items'] = []
tracker_metadata['date_from'] = ''
tracker_metadata['date_to'] = ''
return render_template("showTracker.html", tracker_metadata=tracker_metadata, bootstrap_label=bootstrap_label)
@hunter.route("/tracker/update_tracker_tags", methods=['POST'])
def update_tracker_tags():
user_id = current_user.get_id()
term_uuid = request.form.get('uuid')
res = Term.check_term_uuid_valid_access(term_uuid, user_id)
if res: # invalid access
return Response(json.dumps(res[0], indent=2, sort_keys=True), mimetype='application/json'), res[1]
tags = request.form.get('tags')
if tags:
tags = tags.split()
tags = []
Term.replace_tracked_term_tags(term_uuid, tags)
return redirect(url_for('hunter.show_tracker', uuid=term_uuid))
@hunter.route("/tracker/update_tracker_mails", methods=['POST'])
def update_tracker_mails():
user_id = current_user.get_id()
term_uuid = request.form.get('uuid')
res = Term.check_term_uuid_valid_access(term_uuid, user_id)
if res: # invalid access
return Response(json.dumps(res[0], indent=2, sort_keys=True), mimetype='application/json'), res[1]
mails = request.form.get('mails')
if mails:
mails = mails.split()
mails = []
res = Term.replace_tracked_term_mails(term_uuid, mails)
if res: # invalid mail
return Response(json.dumps(res[0], indent=2, sort_keys=True), mimetype='application/json'), res[1]
return redirect(url_for('hunter.show_tracker', uuid=term_uuid))
@hunter.route("/tracker/delete", methods=['GET'])
def delete_tracker():
user_id = current_user.get_id()
term_uuid = request.args.get('uuid')
res = Term.parse_tracked_term_to_delete({'uuid': term_uuid}, user_id)
if res[1] !=200:
return Response(json.dumps(res[0], indent=2, sort_keys=True), mimetype='application/json'), res[1]
return redirect(url_for('hunter.tracked_menu'))
@hunter.route("/tracker/get_json_tracker_stats", methods=['GET'])
def get_json_tracker_stats():
date_from = request.args.get('date_from')
date_to = request.args.get('date_to')
if date_from:
date_from = date_from.replace('-', '')
if date_to:
date_to = date_to.replace('-', '')
tracker_uuid = request.args.get('uuid')
if date_from and date_to:
res = Term.get_list_tracked_term_stats_by_day([tracker_uuid], date_from=date_from, date_to=date_to)
res = Term.get_list_tracked_term_stats_by_day([tracker_uuid])
return jsonify(res)
# ========= REGISTRATION =========
app.register_blueprint(hunter, url_prefix=baseUrl)

View File

@ -0,0 +1,153 @@
<!DOCTYPE html>
<link rel="icon" href="{{ url_for('static', filename='image/ail-icon.png')}}">
<!-- Core CSS -->
<link href="{{ url_for('static', filename='css/bootstrap4.min.css') }}" rel="stylesheet">
<link href="{{ url_for('static', filename='css/font-awesome.min.css') }}" rel="stylesheet">
<link href="{{ url_for('static', filename='css/daterangepicker.min.css') }}" rel="stylesheet">
<!-- JS -->
<script src="{{ url_for('static', filename='js/jquery.js')}}"></script>
<script src="{{ url_for('static', filename='js/popper.min.js')}}"></script>
<script src="{{ url_for('static', filename='js/bootstrap4.min.js')}}"></script>
{% include 'nav_bar.html' %}
<div class="container-fluid">
<div class="row">
{% include 'hunter/menu_sidebar.html' %}
<div class="col-12 col-lg-10" id="core_content">
<div class="card mb-3 mt-1">
<div class="card-header">
<h5 class="card-title">Create a new tracker</h5>
<div class="card-body">
<p class="card-text">Enter a domain and choose what kind of data you want.</p>
<form action="{{ url_for('hunter.add_tracked_menu') }}" method='post'>
<div class="row">
<div class="col-12 col-xl-9">
<div class="input-group mb-2 mr-sm-2">
<div class="input-group-prepend">
<div class="input-group-text"><i class="fas fa-tag"></i></div>
<input id="tags" name="tags" class="form-control" placeholder="Tags (optional, space separated)" type="text">
<div class="input-group mb-2 mr-sm-2">
<div class="input-group-prepend">
<div class="input-group-text"><i class="fas fa-at"></i></div>
<input id="mails" name="mails" class="form-control" placeholder="E-Mails Notification (optional, space separated)" type="text">
<div class="col-12 col-xl-3">
<div class="custom-control custom-switch mt-1">
<input class="custom-control-input" type="checkbox" name="level" id="id_level" checked>
<label class="custom-control-label" for="id_level">
<i class="fas fa-users"></i>&nbsp;Show tracker to all Users
<select id="tracker_type" name="tracker_type" class="custom-select w-25 mb-3">
<option disabled selected value> -- Select a tracker type -- </option>
<option value="word">Word</option>
<option value="set">Set</option>
<option value="regex">Regex</option>
<p id="tracker_desc">Terms to track (space separated)</p>
<div class="row">
<div class="col-12 col-lg-10">
<input id="term" name="term" class="form-control" placeholder="Terms to track (space separated)" type="text">
<div class="col-12 col-lg-2">
<input type="number" id="nb_word" name="nb_word" name="quantity" min="1" placeholder="Nb of keywords">
<button class="btn btn-success mt-2">
<i class="fas fa-plus"></i> Add Tracker
var chart = {};
$('#tracker_type').on('change', function() {
var tracker_type = this.value;
if (tracker_type=="word") {
$("#tracker_desc").text("Term to track:");
} else if (tracker_type=="set") {
$("#tracker_desc").text("Terms to track (space separated). Select the numbers of different tokens to trigger this tracker");
} else {
$("#tracker_desc").text("Enter a valid Python regex");
function toggle_sidebar(){

View File

@ -0,0 +1,305 @@
<!DOCTYPE html>
<meta charset="utf-8">
<title>AIL Framework - AIL</title>
<link rel="icon" href="{{ url_for('static', filename='image/ail-icon.png') }}">
<!-- Core CSS -->
<link href="{{ url_for('static', filename='css/bootstrap4.min.css') }}" rel="stylesheet">
<link href="{{ url_for('static', filename='css/font-awesome.min.css') }}" rel="stylesheet">
<link href="{{ url_for('static', filename='css/daterangepicker.min.css') }}" rel="stylesheet">
<link href="{{ url_for('static', filename='css/dataTables.bootstrap.min.css') }}" rel="stylesheet">
<!-- JS -->
<script src="{{ url_for('static', filename='js/jquery.js')}}"></script>
<script src="{{ url_for('static', filename='js/popper.min.js')}}"></script>
<script src="{{ url_for('static', filename='js/bootstrap4.min.js')}}"></script>
<script src="{{ url_for('static', filename='js/jquery.dataTables.min.js')}}"></script>
<script src="{{ url_for('static', filename='js/dataTables.bootstrap.min.js')}}"></script>
<script language="javascript" src="{{ url_for('static', filename='js/d3.min.js') }}"></script>
<script src="{{ url_for('static', filename='js/plugins/d3/sparklines.js')}}"></script>
<script src="{{ url_for('static', filename='js/plugins/d3/graphlinesgroup.js')}}"></script>
<script language="javascript" src="{{ url_for('static', filename='js/moment.min.js') }}"></script>
<script language="javascript" src="{{ url_for('static', filename='js/jquery.daterangepicker.min.js') }}"></script>
.btn-link {
color: #000000
cursor: pointer;
{% include 'nav_bar.html' %}
<div class="container-fluid">
<div class="row">
{% include 'hunter/menu_sidebar.html' %}
<div class="col-12 col-lg-10" id="core_content">
<div class="card my-3">
<div class="card-header" style="background-color:#d9edf7;font-size: 15px">
<h4 class="text-secondary">{{ tracker_metadata['uuid'] }} </h4>
<ul class="list-group mb-2">
<li class="list-group-item py-0">
<div class="row">
<div class="col-md-10">
<table class="table">
<th>Date added</th>
<th>Created by</th>
<th>First seen</th>
<th>Last seen</th>
<th>Tags <span class="btn-link btn-interaction mouse_pointer" title="Edit Tags List" onclick="edit_tags();"><i class="fas fa-pencil-alt" style="color:Red;"></i></span></th>
<th>Email <span class="btn-link btn-interaction mouse_pointer" title="Edit Email List" onclick="edit_mails();"><i class="fas fa-pencil-alt" style="color:Red;"></i></span></th>
<td>{{ tracker_metadata['type'] }}</td>
<td>{{ tracker_metadata['term'] }}</td>
<td>{{ tracker_metadata['date'][0:4] }}/{{ tracker_metadata['date'][4:6] }}/{{ tracker_metadata['date'][6:8] }}</td>
<td>{{ tracker_metadata['level'] }}</td>
<td>{{ tracker_metadata['user_id'] }}</td>
{% if tracker_metadata['first_seen'] %}
{{ tracker_metadata['first_seen'][0:4] }}/{{ tracker_metadata['first_seen'][4:6] }}/{{ tracker_metadata['first_seen'][6:8] }}
{% endif %}
{% if tracker_metadata['last_seen'] %}
{{ tracker_metadata['last_seen'][0:4] }}/{{ tracker_metadata['last_seen'][4:6] }}/{{ tracker_metadata['last_seen'][6:8] }}
{% endif %}
{% for tag in tracker_metadata['tags'] %}
<a href="{{ url_for('Tags.Tags_page') }}?ltags={{ tag }}">
<span class="badge badge-{{ bootstrap_label[loop.index0 % 5] }}">{{ tag }}</span>
{% endfor %}
{% for mail in tracker_metadata['mails'] %}
{{ mail }}<br>
{% endfor %}
<div class="col-md-1">
<div id="sparkline"></div>
<div id="div_edit_tags">
<form action="{{ url_for('hunter.update_tracker_tags') }}" method='post'>
<input name="uuid" type="text" value="{{tracker_metadata['uuid']}}" hidden>
<div>All Tags added for this tracker, space separated: </div>
<div class="input-group mb-2 mr-sm-2">
<div class="input-group-prepend">
<div class="input-group-text"><i class="fas fa-tag"></i></div>
<input id="tags" name="tags" class="form-control" placeholder="Tags (optional, space separated)" type="text"
value="{% for tag in tracker_metadata['tags'] %}{{tag}} {% endfor %}">
<button class="btn btn-info">
<i class="fas fa-pencil-alt"></i> Edit Tags
<div id="div_edit_mails">
<form action="{{ url_for('hunter.update_tracker_mails') }}" method='post'>
<input name="uuid" type="text" value="{{tracker_metadata['uuid']}}" hidden>
<div>All E-Mails to Notify for this tracker, space separated: </div>
<div class="input-group mb-2 mr-sm-2">
<div class="input-group-prepend">
<div class="input-group-text"><i class="fas fa-at"></i></div>
<input id="mails" name="mails" class="form-control" placeholder="E-Mails Notification (optional, space separated)" type="text"
value="{% for mail in tracker_metadata['mails'] %}{{mail}} {% endfor %}">
<button class="btn btn-info">
<i class="fas fa-pencil-alt"></i> Edit Email Notification
<a href="{{ url_for('hunter.delete_tracker') }}?uuid={{tracker_metadata['uuid']}}" class="float-right" style="font-size: 15px">
<button class='btn btn-danger'><i class="fas fa-trash-alt"></i>
<div id="graphline" class="text-center"></div>
<div class="card mb-5 mt-1">
<div class="card-body">
<div class="row mb-3">
<div class="col-md-6">
<div class="input-group" id="date-range-from">
<div class="input-group-prepend"><span class="input-group-text"><i class="far fa-calendar-alt" aria-hidden="true"></i></span></div>
<input class="form-control" id="date-range-from-input" placeholder="yyyy-mm-dd" value="{{ tracker_metadata['date_from'] }}" name="date_from" autocomplete="off">
<div class="col-md-6">
<div class="input-group" id="date-range-to">
<div class="input-group-prepend"><span class="input-group-text"><i class="far fa-calendar-alt" aria-hidden="true"></i></span></div>
<input class="form-control" id="date-range-to-input" placeholder="yyyy-mm-dd" value="{{ tracker_metadata['date_to'] }}" name="date_to" autocomplete="off">
<button class="btn btn-info" type="button" id="button-search-tags" onclick="getItems();">
<i class="fas fa-search"></i> Search Tracked Items
{%if tracker_metadata['items']%}
<div class="mt-4">
<table class="table table-bordered table-hover" id="myTable_">
<thead class="thead-dark">
<th>Item Id</th>
{% for item in tracker_metadata['items'] %}
<a class="text-secondary" target="_blank" href="{{ url_for('showsavedpastes.showsavedpaste') }}?paste={{item['id']}}">
<div style="line-height:0.9;">{{ item['id'] }}</div>
<div class="mb-2">
{% for tag in item['tags'] %}
<a href="{{ url_for('Tags.Tags_page') }}?ltags={{ tag }}">
<span class="badge badge-{{ bootstrap_label[loop.index0 % 5] }} pull-left">{{ tag }}</span>
{% endfor %}
{% endfor %}
{% endif %}
separator : ' to ',
getValue: function(){
if ($('#date-range-from-input').val() && $('#date-range-to-input').val() )
return $('#date-range-from-input').val() + ' to ' + $('#date-range-to-input').val();
return '';
setValue: function(s,s1,s2){
separator : ' to ',
getValue: function(){
if ($('#date-range-from-input').val() && $('#date-range-to-input').val() )
return $('#date-range-from-input').val() + ' to ' + $('#date-range-to-input').val();
return '';
setValue: function(s,s1,s2){
"aLengthMenu": [[5, 10, 15, -1], [5, 10, 15, "All"]],
"iDisplayLength": 10,
"order": [[ 0, "desc" ]]
sparkline("sparkline", {{ tracker_metadata['sparkline'] }}, {});
let div_width = $("#graphline").width();
$.getJSON( "{{ url_for('hunter.get_json_tracker_stats') }}?uuid={{ tracker_metadata['uuid'] }}{%if tracker_metadata['date_from']%}&date_from={{ tracker_metadata['date_from'] }}{%endif%}{%if tracker_metadata['date_to']%}&date_to={{ tracker_metadata['date_to'] }}{%endif%}",
function( data ) {multilines_group("graphline", data, {"width": div_width});}
function toggle_sidebar(){
function edit_tags(){
function edit_mails(){
function getItems() {
var date_from = $('#date-range-from-input').val();
var date_to =$('#date-range-to-input').val();
window.location.replace("{{ url_for('hunter.show_tracker') }}?uuid={{ tracker_metadata['uuid'] }}&date_from="+date_from+"&date_to="+date_to);

View File

@ -0,0 +1,206 @@
<!DOCTYPE html>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Tracker Management</title>
<link rel="icon" href="{{ url_for('static', filename='image/ail-icon.png') }}">
<!-- Core CSS -->
<link href="{{ url_for('static', filename='css/bootstrap4.min.css') }}" rel="stylesheet">
<link href="{{ url_for('static', filename='css/font-awesome.min.css') }}" rel="stylesheet">
<link href="{{ url_for('static', filename='css/dataTables.bootstrap.min.css') }}" rel="stylesheet">
<script src="{{ url_for('static', filename='js/jquery.js')}}"></script>
<script src="{{ url_for('static', filename='js/bootstrap4.min.js')}}"></script>
<script src="{{ url_for('static', filename='js/jquery.dataTables.min.js')}}"></script>
<script src="{{ url_for('static', filename='js/dataTables.bootstrap.min.js')}}"></script>
<script src="{{ url_for('static', filename='js/d3.min.js') }}"></script>
<script src="{{ url_for('static', filename='js/plugins/d3/sparklines.js')}}"></script>
.btn-link {
color: #000000
cursor: pointer;
.lb-md {
font-size: 16px;
{% include 'nav_bar.html' %}
<div class="container-fluid">
<div class="row">
{% include 'hunter/menu_sidebar.html' %}
<div class="col-12 col-lg-10" id="core_content">
<div class="card my-3">
<div class="card-header">
<h5 class="card-title">Your {{filter_type}} Trackers</h5>
<div class="card-body">
<table id="table_user_trackers" class="table table-striped table-bordered">
<thead class="bg-dark text-white">
<th>First seen</th>
<th>Last seen</th>
<th>Email notification</th>
<tbody style="font-size: 15px;">
{% for dict_uuid in user_term %}
<span><a target="_blank" href="{{ url_for('hunter.show_tracker') }}?uuid={{ dict_uuid['uuid'] }}">{{dict_uuid['term']}}</a></span>
{% for tag in dict_uuid['tags'] %}
<a href="{{ url_for('Tags.Tags_page') }}?ltags={{ tag }}">
<span class="badge badge-{{ bootstrap_label[loop.index0 % 5] }} pull-left">{{ tag }}</span>
{% endfor %}
{% if dict_uuid['first_seen'] %}
{% endif %}
{% if dict_uuid['last_seen'] %}
{% endif %}
{% for mail in dict_uuid['mails'] %}
{{ mail }}<br>
{% endfor %}
<td id="sparklines_{{ dict_uuid['uuid'] }}" style="text-align:center;"></td>
{% endfor %}
<div class="card my-3">
<div class="card-header">
<h5 class="card-title">Global {{filter_type}} Trackers</h5>
<div class="card-body">
<table id="table_global_trackers" class="table table-striped table-bordered">
<thead class="bg-dark text-white">
<th>First seen</th>
<th>Last seen</th>
<th>Email notification</th>
<tbody style="font-size: 15px;">
{% for dict_uuid in global_term %}
<span><a target="_blank" href="{{ url_for('hunter.show_tracker') }}?uuid={{ dict_uuid['uuid'] }}">{{dict_uuid['term']}}</a></span>
{% for tag in dict_uuid['tags'] %}
<a href="{{ url_for('Tags.Tags_page') }}?ltags={{ tag }}">
<span class="badge badge-{{ bootstrap_label[loop.index0 % 5] }}">{{ tag }}</span>
{% endfor %}
{% if dict_uuid['first_seen'] %}
{% endif %}
{% if dict_uuid['last_seen'] %}
{% endif %}
{% for mail in dict_uuid['mails'] %}
{{ mail }}<br>
{% endfor %}
<td id="sparklines_{{ dict_uuid['uuid'] }}" style="text-align:center;"></td>
{% endfor %}
<a class="btn btn-info my-4" href="{{url_for('hunter.add_tracked_menu')}}">
<i class="fas fa-plus-circle ml-auto"></i>
Create New Tracker
"aLengthMenu": [[5, 10, 15, -1], [5, 10, 15, "All"]],
"iDisplayLength": 10,
"order": [[ 0, "desc" ]]
"aLengthMenu": [[5, 10, 15, -1], [5, 10, 15, "All"]],
"iDisplayLength": 10,
"order": [[ 0, "desc" ]]
{% for dict_uuid in user_term %}
sparkline("sparklines_{{ dict_uuid['uuid'] }}", {{ dict_uuid['sparkline'] }}, {height: 40});
{% endfor %}
{% for dict_uuid in global_term %}
sparkline("sparklines_{{ dict_uuid['uuid'] }}", {{ dict_uuid['sparkline'] }}, {height: 40});
{% endfor %}
function toggle_sidebar(){

View File

@ -19,6 +19,7 @@ import Pgp
import Item
import Paste
import Tag
import Term
from flask import Flask, render_template, jsonify, request, Blueprint, redirect, url_for, Response
from flask_login import login_required
@ -57,8 +58,11 @@ def verify_token(token):
return False
def get_user_from_token(token):
return r_serv_db.hget('user:tokens', token)
def verify_user_role(role, token):
user_id = r_serv_db.hget('user:tokens', token)
user_id = get_user_from_token(token)
if user_id:
if is_in_role(user_id, role):
return True
@ -310,6 +314,37 @@ def get_all_tags():
res = {'tags': Tag.get_all_tags()}
return Response(json.dumps(res, indent=2, sort_keys=True), mimetype='application/json'), 200
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# # # # # # # # # # # # # # TRACKER # # # # # # # # # # # # # # # # #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
@restApi.route("api/v1/add/tracker/term", methods=['POST'])
def add_tracker_term():
data = request.get_json()
user_token = get_auth_from_header()
user_id = get_user_from_token(user_token)
res = Term.parse_json_term_to_add(data, user_id)
return Response(json.dumps(res[0], indent=2, sort_keys=True), mimetype='application/json'), res[1]
@restApi.route("api/v1/delete/tracker/term", methods=['DELETE'])
def delete_tracker_term():
data = request.get_json()
user_token = get_auth_from_header()
user_id = get_user_from_token(user_token)
res = Term.parse_tracked_term_to_delete(data, user_id)
return Response(json.dumps(res[0], indent=2, sort_keys=True), mimetype='application/json'), res[1]
@restApi.route("api/v1/get/tracker/term/item", methods=['POST'])
def get_tracker_term_item():
data = request.get_json()
user_token = get_auth_from_header()
user_id = get_user_from_token(user_token)
res = Term.parse_get_tracker_term_item(data, user_id)
return Response(json.dumps(res[0], indent=2, sort_keys=True), mimetype='application/json'), res[1]
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# # # # # # # # # # # # CRYPTOCURRENCY # # # # # # # # # # # # # #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
@ -420,7 +455,6 @@ def get_item_cryptocurrency_bitcoin():
return Response(json.dumps(res[0], indent=2, sort_keys=True), mimetype='application/json'), res[1]
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# # # # # # # # # # # # # IMPORT # # # # # # # # # # # # # # # # # #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #

View File

@ -6,20 +6,25 @@
note: The matching of credential against supplied credential is done using Levenshtein distance
import json
import redis
import datetime
import calendar
import flask
from flask import Flask, render_template, jsonify, request, Blueprint, url_for, redirect
from flask import Flask, render_template, jsonify, request, Blueprint, url_for, redirect, Response
from Role_Manager import login_admin, login_analyst
from flask_login import login_required
from flask_login import login_required, current_user
import re
import Paste
from pprint import pprint
import Levenshtein
# ---------------------------------------------------------------
import Paste
import Term
# ============ VARIABLES ============
import Flask_config
@ -146,338 +151,6 @@ def save_tag_to_auto_push(list_tag):
# ============ ROUTES ============
def terms_management():
per_paste = request.args.get('per_paste')
if per_paste == "1" or per_paste is None:
per_paste_text = "per_paste_"
per_paste = 1
per_paste_text = ""
per_paste = 0
today =
today = today.replace(hour=0, minute=0, second=0, microsecond=0)
today_timestamp = calendar.timegm(today.timetuple())
# Map tracking if notifications are enabled for a specific term
notificationEnabledDict = {}
# Maps a specific term to the associated email addresses
notificationEMailTermMapping = {}
notificationTagsTermMapping = {}
trackReg_list = []
trackReg_list_values = []
trackReg_list_num_of_paste = []
for tracked_regex in r_serv_term.smembers(TrackedRegexSet_Name):
notificationEMailTermMapping[tracked_regex] = r_serv_term.smembers(TrackedTermsNotificationEmailsPrefix_Name + tracked_regex)
notificationTagsTermMapping[tracked_regex] = r_serv_term.smembers(TrackedTermsNotificationTagsPrefix_Name + tracked_regex)
if tracked_regex not in notificationEnabledDict:
notificationEnabledDict[tracked_regex] = False
value_range = Term_getValueOverRange(tracked_regex, today_timestamp, [1, 7, 31], per_paste=per_paste_text)
term_date = r_serv_term.hget(TrackedRegexDate_Name, tracked_regex)
set_paste_name = "regex_" + tracked_regex
term_date = datetime.datetime.utcfromtimestamp(int(term_date)) if term_date is not None else "No date recorded"
if tracked_regex in r_serv_term.smembers(TrackedTermsNotificationEnabled_Name):
notificationEnabledDict[tracked_regex] = True
trackSet_list = []
trackSet_list_values = []
trackSet_list_num_of_paste = []
for tracked_set in r_serv_term.smembers(TrackedSetSet_Name):
tracked_set = tracked_set
notificationEMailTermMapping[tracked_set] = r_serv_term.smembers(TrackedTermsNotificationEmailsPrefix_Name + tracked_set)
notificationTagsTermMapping[tracked_set] = r_serv_term.smembers(TrackedTermsNotificationTagsPrefix_Name + tracked_set)
if tracked_set not in notificationEnabledDict:
notificationEnabledDict[tracked_set] = False
value_range = Term_getValueOverRange(tracked_set, today_timestamp, [1, 7, 31], per_paste=per_paste_text)
term_date = r_serv_term.hget(TrackedSetDate_Name, tracked_set)
set_paste_name = "set_" + tracked_set
term_date = datetime.datetime.utcfromtimestamp(int(term_date)) if term_date is not None else "No date recorded"
if tracked_set in r_serv_term.smembers(TrackedTermsNotificationEnabled_Name):
notificationEnabledDict[tracked_set] = True
#Tracked terms
track_list = []
track_list_values = []
track_list_num_of_paste = []
for tracked_term in r_serv_term.smembers(TrackedTermsSet_Name):
notificationEMailTermMapping[tracked_term] = r_serv_term.smembers(TrackedTermsNotificationEmailsPrefix_Name + tracked_term)
notificationTagsTermMapping[tracked_term] = r_serv_term.smembers(TrackedTermsNotificationTagsPrefix_Name + tracked_term)
if tracked_term not in notificationEnabledDict:
notificationEnabledDict[tracked_term] = False
value_range = Term_getValueOverRange(tracked_term, today_timestamp, [1, 7, 31], per_paste=per_paste_text)
term_date = r_serv_term.hget(TrackedTermsDate_Name, tracked_term)
set_paste_name = "tracked_" + tracked_term
track_list_num_of_paste.append( r_serv_term.scard(set_paste_name) )
term_date = datetime.datetime.utcfromtimestamp(int(term_date)) if term_date is not None else "No date recorded"
if tracked_term in r_serv_term.smembers(TrackedTermsNotificationEnabled_Name):
notificationEnabledDict[tracked_term] = True
#blacklist terms
black_list = []
for blacked_term in r_serv_term.smembers(BlackListTermsSet_Name):
term_date = r_serv_term.hget(BlackListTermsDate_Name, blacked_term)
term_date = datetime.datetime.utcfromtimestamp(int(term_date)) if term_date is not None else "No date recorded"
black_list.append([blacked_term, term_date])
return render_template("terms_management.html",
black_list=black_list, track_list=track_list, trackReg_list=trackReg_list, trackSet_list=trackSet_list,
track_list_values=track_list_values, track_list_num_of_paste=track_list_num_of_paste,
trackReg_list_values=trackReg_list_values, trackReg_list_num_of_paste=trackReg_list_num_of_paste,
trackSet_list_values=trackSet_list_values, trackSet_list_num_of_paste=trackSet_list_num_of_paste,
per_paste=per_paste, notificationEnabledDict=notificationEnabledDict, bootstrap_label=bootstrap_label,
notificationEMailTermMapping=notificationEMailTermMapping, notificationTagsTermMapping=notificationTagsTermMapping)
def terms_management_query_paste():
term = request.args.get('term')
paste_info = []
# check if regex or not
if term.startswith('/') and term.endswith('/'):
set_paste_name = "regex_" + term
track_list_path = r_serv_term.smembers(set_paste_name)
elif term.startswith('\\') and term.endswith('\\'):
set_paste_name = "set_" + term
track_list_path = r_serv_term.smembers(set_paste_name)
set_paste_name = "tracked_" + term
track_list_path = r_serv_term.smembers(set_paste_name)
for path in track_list_path:
paste = Paste.Paste(path)
p_date = str(paste._get_p_date())
p_date = p_date[0:4]+'/'+p_date[4:6]+'/'+p_date[6:8]
p_source = paste.p_source
p_size = paste.p_size
p_mime = paste.p_mime
p_lineinfo = paste.get_lines_info()
p_content = paste.get_p_content()
if p_content != 0:
p_content = p_content[0:400]
paste_info.append({"path": path, "date": p_date, "source": p_source, "size": p_size, "mime": p_mime, "lineinfo": p_lineinfo, "content": p_content})
return jsonify(paste_info)
def terms_management_query():
TrackedTermsDate_Name = "TrackedTermDate"
BlackListTermsDate_Name = "BlackListTermDate"
term = request.args.get('term')
section = request.args.get('section')
today =
today = today.replace(hour=0, minute=0, second=0, microsecond=0)
today_timestamp = calendar.timegm(today.timetuple())
value_range = Term_getValueOverRange(term, today_timestamp, [1, 7, 31])
if section == "followTerm":
term_date = r_serv_term.hget(TrackedTermsDate_Name, term)
elif section == "blacklistTerm":
term_date = r_serv_term.hget(BlackListTermsDate_Name, term)
term_date = datetime.datetime.utcfromtimestamp(int(term_date)) if term_date is not None else "No date recorded"
return jsonify(value_range)
@terms.route("/terms_management_action/", methods=['GET'])
def terms_management_action():
today =
today = today.replace(microsecond=0)
today_timestamp = calendar.timegm(today.timetuple())
section = request.args.get('section')
action = request.args.get('action')
term = request.args.get('term')
notificationEmailsParam = request.args.get('emailAddresses')
input_tags = request.args.get('tags')
if action is None or term is None or notificationEmailsParam is None:
return "None"
if section == "followTerm":
if action == "add":
# Make a list of all passed email addresses
notificationEmails = notificationEmailsParam.split()
validNotificationEmails = []
# check for valid email addresses
for email in notificationEmails:
# Really basic validation:
# has exactly one @ sign, and at least one . in the part after the @
if re.match(r"[^@]+@[^@]+\.[^@]+", email):
# create tags list
list_tags = input_tags.split()
# check if regex/set or simple term
if term.startswith('/') and term.endswith('/'):
r_serv_term.sadd(TrackedRegexSet_Name, term)
r_serv_term.hset(TrackedRegexDate_Name, term, today_timestamp)
# add all valid emails to the set
for email in validNotificationEmails:
r_serv_term.sadd(TrackedTermsNotificationEmailsPrefix_Name + term, email)
# enable notifications by default
r_serv_term.sadd(TrackedTermsNotificationEnabled_Name, term)
# add tags list
for tag in list_tags:
r_serv_term.sadd(TrackedTermsNotificationTagsPrefix_Name + term, tag)
elif term.startswith('\\') and term.endswith('\\'):
tab_term = term[1:-1]
perc_finder = re.compile("\[[0-9]{1,3}\]").search(tab_term)
if perc_finder is not None:
match_percent =[1:-1]
set_to_add = term
set_to_add = "\\" + tab_term[:-1] + ", [{}]]\\".format(match_percent)
r_serv_term.sadd(TrackedSetSet_Name, set_to_add)
r_serv_term.hset(TrackedSetDate_Name, set_to_add, today_timestamp)
# add all valid emails to the set
for email in validNotificationEmails:
r_serv_term.sadd(TrackedTermsNotificationEmailsPrefix_Name + set_to_add, email)
# enable notifications by default
r_serv_term.sadd(TrackedTermsNotificationEnabled_Name, set_to_add)
# add tags list
for tag in list_tags:
r_serv_term.sadd(TrackedTermsNotificationTagsPrefix_Name + set_to_add, tag)
#simple term
r_serv_term.sadd(TrackedTermsSet_Name, term.lower())
r_serv_term.hset(TrackedTermsDate_Name, term.lower(), today_timestamp)
# add all valid emails to the set
for email in validNotificationEmails:
r_serv_term.sadd(TrackedTermsNotificationEmailsPrefix_Name + term.lower(), email)
# enable notifications by default
r_serv_term.sadd(TrackedTermsNotificationEnabled_Name, term.lower())
# add tags list
for tag in list_tags:
r_serv_term.sadd(TrackedTermsNotificationTagsPrefix_Name + term.lower(), tag)
elif action == "toggleEMailNotification":
# get the current state
if term in r_serv_term.smembers(TrackedTermsNotificationEnabled_Name):
# remove it
r_serv_term.srem(TrackedTermsNotificationEnabled_Name, term.lower())
# add it
r_serv_term.sadd(TrackedTermsNotificationEnabled_Name, term.lower())
#del action
if term.startswith('/') and term.endswith('/'):
r_serv_term.srem(TrackedRegexSet_Name, term)
r_serv_term.hdel(TrackedRegexDate_Name, term)
elif term.startswith('\\') and term.endswith('\\'):
r_serv_term.srem(TrackedSetSet_Name, term)
r_serv_term.hdel(TrackedSetDate_Name, term)
r_serv_term.srem(TrackedTermsSet_Name, term.lower())
r_serv_term.hdel(TrackedTermsDate_Name, term.lower())
# delete the associated notification emails too
r_serv_term.delete(TrackedTermsNotificationEmailsPrefix_Name + term)
# delete the associated tags set
r_serv_term.delete(TrackedTermsNotificationTagsPrefix_Name + term)
elif section == "blacklistTerm":
if action == "add":
r_serv_term.sadd(BlackListTermsSet_Name, term.lower())
r_serv_term.hset(BlackListTermsDate_Name, term, today_timestamp)
r_serv_term.srem(BlackListTermsSet_Name, term.lower())
return "None"
to_return = {}
to_return["section"] = section
to_return["action"] = action
to_return["term"] = term
return jsonify(to_return)
@terms.route("/terms_management/delete_terms_tags", methods=['POST'])
def delete_terms_tags():
term = request.form.get('term')
tags_to_delete = request.form.getlist('tags_to_delete')
if term is not None and tags_to_delete is not None:
for tag in tags_to_delete:
r_serv_term.srem(TrackedTermsNotificationTagsPrefix_Name + term, tag)
return redirect(url_for('terms.terms_management'))
return 'None args', 400
@terms.route("/terms_management/delete_terms_email", methods=['GET'])
def delete_terms_email():
term = request.args.get('term')
email = request.args.get('email')
if term is not None and email is not None:
r_serv_term.srem(TrackedTermsNotificationEmailsPrefix_Name + term, email)
return redirect(url_for('terms.terms_management'))
return 'None args', 400

View File

@ -1,7 +1,6 @@
<li id='page-termsfrequency'><a class="dropdown-toggle" data-toggle="dropdown" href="{{ url_for('terms.terms_management') }}"><i class="fa fa-eye"></i> Terms frequency
<li id='page-termsfrequency'><a class="dropdown-toggle" data-toggle="dropdown" href="{{ url_for('terms.credentials_tracker') }}"><i class="fa fa-eye"></i> Terms frequency
<span class="caret"></span></a>
<ul class="dropdown-menu">
<li><a href="{{ url_for('terms.terms_management') }}"><i class="fa fa-gear "> </i> Terms managements</a></li>
<li><a href="{{ url_for('terms.credentials_tracker') }}"><i class="glyphicon glyphicon-screenshot"> </i> Credentials seeker</a></li>
<li><a href="{{ url_for('terms.terms_plot_top') }}"><i class="glyphicon glyphicon-fire"> </i> Terms plot top</a></li>
<li><a href="{{ url_for('terms.terms_plot_tool') }}"><i class="fa fa-wrench"> </i> Terms plot tool</a></li>

View File

@ -0,0 +1,48 @@
const sparkline = (container_id, data, options) => {
const defaults = {
style: {
stroke: "rgb(0, 0, 0)",
strokeWidth: 2
margin: {top:3, right:3, bottom:3, left:3},
width: 100,
height: 60
options = $.extend(true, defaults, options);
let width_spark = options.width - options.margin.left - options.margin.right;
let height_spark = options.height - - options.margin.bottom;
let maxX = data.length;
let maxY = d3.max(data, function(d) { return d } );
let x = d3.scaleLinear()
.range([0, width_spark])
let y = d3.scaleLinear()
.range([height_spark, 0])
let line = d3.line()
.x(function(d, i) {return x(i)})
.y(function(d) {return y(d)});
let res = "#"+container_id ).append('svg')
.attr('width', options.width)
.attr('height', options.height)
.attr("transform", "translate("+options.margin.left+","")")
.attr('d', line)
.style("fill", "none")
return res

View File

@ -0,0 +1,42 @@
<div class="col-12 col-lg-2 p-0 bg-light border-right" id="side_menu">
<button type="button" class="btn btn-outline-secondary mt-1 ml-3" onclick="toggle_sidebar()">
<i class="fas fa-align-left"></i>
<span>Toggle Sidebar</span>
<nav class="navbar navbar-expand navbar-light bg-light flex-md-column flex-row align-items-start py-2" id="nav_menu">
<h5 class="d-flex text-muted w-100">
<span>Trackers </span>
<a class="ml-auto" href="{{url_for('hunter.add_tracked_menu')}}">
<i class="fas fa-plus-circle ml-auto"></i>
<ul class="nav flex-md-column flex-row navbar-nav justify-content-between w-100"> <!--nav-pills-->
<li class="nav-item">
<a class="nav-link" href="{{url_for('hunter.tracked_menu')}}" id="nav_tracker_">
<i class="fas fa-ruler-combined"></i>
<span>All Trackers</span>
<li class="nav-item">
<a class="nav-link" href="{{url_for('hunter.tracked_menu_word')}}" id="nav_tracker_word">
<i class="fas fa-font"></i>
<span>Tracked Words</span>
<li class="nav-item">
<a class="nav-link" href="{{url_for('hunter.tracked_menu_set')}}" id="nav_tracker_set">
<i class="fas fa-layer-group"></i>
<span>Tracked Set</span>
<li class="nav-item">
<a class="nav-link" href="{{url_for('hunter.tracked_menu_regex')}}" id="nav_tracker_regex">
<i class="fas fa-ruler"></i>
<span>Tracked Regex</span>

View File

@ -19,7 +19,7 @@
<a class="nav-link" id="page-Browse-Items" href="{{ url_for('Tags.Tags_page') }}" aria-disabled="true"><i class="fas fa-tag"></i> Browse Items</a>
<li class="nav-item mr-3">
<a class="nav-link" href="{{ url_for('terms.terms_management') }}" aria-disabled="true"><i class="fas fa-crosshairs"></i> Leaks Hunter</a>
<a class="nav-link" id="page-Tracker" href="{{ url_for('hunter.tracked_menu') }}" aria-disabled="true"><i class="fas fa-crosshairs"></i> Leaks Hunter</a>
<li class="nav-item mr-3">
<a class="nav-link" id="page-Crawler" href="{{ url_for('hiddenServices.dashboard') }}" tabindex="-1" aria-disabled="true"><i class="fas fa-spider"></i> Crawlers</a>