diff --git a/bin/ApiKey.py b/bin/ApiKey.py index 7c12751e..8ce7e2b4 100755 --- a/bin/ApiKey.py +++ b/bin/ApiKey.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* """ @@ -65,12 +65,6 @@ if __name__ == "__main__": publisher.info("ApiKey started") - # REDIS # - r_serv2 = redis.StrictRedis( - host=p.config.get("Redis_Cache", "host"), - port=p.config.getint("Redis_Cache", "port"), - db=p.config.getint("Redis_Cache", "db")) - message = p.get_from_set() # TODO improve REGEX diff --git a/bin/Attributes.py b/bin/Attributes.py index 96471a79..a29f34b3 100755 --- a/bin/Attributes.py +++ b/bin/Attributes.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* """ diff --git a/bin/Base64.py b/bin/Base64.py index fbb8a356..c7700994 100755 --- a/bin/Base64.py +++ b/bin/Base64.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* """ Base64 module diff --git a/bin/Bitcoin.py b/bin/Bitcoin.py index 890a2498..42468759 100755 --- a/bin/Bitcoin.py +++ b/bin/Bitcoin.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* """ The Bitcoin Module diff --git a/bin/Categ.py b/bin/Categ.py index 019efc36..cf78f90f 100755 --- a/bin/Categ.py +++ b/bin/Categ.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* """ The ZMQ_PubSub_Categ Module diff --git a/bin/Credential.py b/bin/Credential.py index 776f75a8..7d0c3bdf 100755 --- a/bin/Credential.py +++ b/bin/Credential.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* """ @@ -55,7 +55,8 @@ if __name__ == "__main__": server_cred = redis.StrictRedis( host=p.config.get("Redis_Level_DB_TermCred", "host"), port=p.config.get("Redis_Level_DB_TermCred", "port"), - db=p.config.get("Redis_Level_DB_TermCred", "db")) + db=p.config.get("Redis_Level_DB_TermCred", "db"), + decode_responses=True) criticalNumberToAlert = p.config.getint("Credential", "criticalNumberToAlert") minTopPassList = p.config.getint("Credential", "minTopPassList") @@ -68,8 +69,8 @@ if __name__ == "__main__": message = p.get_from_set() if message is None: publisher.debug("Script Credential is Idling 10s") - print('sleeping 10s') - time.sleep(1) + #print('sleeping 10s') + time.sleep(10) continue filepath, count = message.split(' ') @@ -109,7 +110,7 @@ if __name__ == "__main__": site_occurence = re.findall(regex_site_for_stats, content) for site in site_occurence: site_domain = site[1:-1] - if site_domain.encode('utf8') in creds_sites.keys(): + if site_domain in creds_sites.keys(): creds_sites[site_domain] += 1 else: creds_sites[site_domain] = 1 @@ -123,10 +124,6 @@ if __name__ == "__main__": creds_sites[domain] = 1 for site, num in creds_sites.items(): # Send for each different site to moduleStats - try: - site = site.decode('utf8') - except: - pass mssg = 'credential;{};{};{}'.format(num, site, paste.p_date) print(mssg) diff --git a/bin/CreditCards.py b/bin/CreditCards.py index 4d1f4d1f..a7441807 100755 --- a/bin/CreditCards.py +++ b/bin/CreditCards.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* """ diff --git a/bin/Curve.py b/bin/Curve.py index 5691fcee..16e13536 100755 --- a/bin/Curve.py +++ b/bin/Curve.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* """ This module is consuming the Redis-list created by the ZMQ_Sub_Curve_Q Module. @@ -49,7 +49,7 @@ top_termFreq_setName_month = ["TopTermFreq_set_month", 31] top_termFreq_set_array = [top_termFreq_setName_day,top_termFreq_setName_week, top_termFreq_setName_month] def check_if_tracked_term(term, path): - if term.encode('utf8') in server_term.smembers(TrackedTermsSet_Name): + if term in server_term.smembers(TrackedTermsSet_Name): #add_paste to tracked_word_set set_name = "tracked_" + term server_term.sadd(set_name, path) @@ -84,12 +84,14 @@ if __name__ == "__main__": r_serv1 = redis.StrictRedis( host=p.config.get("Redis_Level_DB_Curve", "host"), port=p.config.get("Redis_Level_DB_Curve", "port"), - db=p.config.get("Redis_Level_DB_Curve", "db")) + db=p.config.get("Redis_Level_DB_Curve", "db"), + decode_responses=True) server_term = redis.StrictRedis( host=p.config.get("Redis_Level_DB_TermFreq", "host"), port=p.config.get("Redis_Level_DB_TermFreq", "port"), - db=p.config.get("Redis_Level_DB_TermFreq", "db")) + db=p.config.get("Redis_Level_DB_TermFreq", "db"), + decode_responses=True) # FUNCTIONS # publisher.info("Script Curve started") @@ -132,7 +134,7 @@ if __name__ == "__main__": curr_word_value_perPaste = int(server_term.hincrby("per_paste_" + str(timestamp), low_word, int(1))) # Add in set only if term is not in the blacklist - if low_word.encode('utf8') not in server_term.smembers(BlackListTermsSet_Name): + if low_word not in server_term.smembers(BlackListTermsSet_Name): #consider the num of occurence of this term server_term.zincrby(curr_set, low_word, float(score)) #1 term per paste diff --git a/bin/CurveManageTopSets.py b/bin/CurveManageTopSets.py index 1b37d21a..3ee5a9d5 100755 --- a/bin/CurveManageTopSets.py +++ b/bin/CurveManageTopSets.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* """ @@ -139,7 +139,8 @@ if __name__ == '__main__': r_temp = redis.StrictRedis( host=cfg.get('RedisPubSub', 'host'), port=cfg.getint('RedisPubSub', 'port'), - db=cfg.getint('RedisPubSub', 'db')) + db=cfg.getint('RedisPubSub', 'db'), + decode_responses=True) timestamp = int(time.mktime(datetime.datetime.now().timetuple())) value = str(timestamp) + ", " + "-" @@ -149,7 +150,8 @@ if __name__ == '__main__': server_term = redis.StrictRedis( host=cfg.get("Redis_Level_DB_TermFreq", "host"), port=cfg.getint("Redis_Level_DB_TermFreq", "port"), - db=cfg.getint("Redis_Level_DB_TermFreq", "db")) + db=cfg.getint("Redis_Level_DB_TermFreq", "db"), + decode_responses=True) publisher.info("Script Curve_manage_top_set started") diff --git a/bin/Cve.py b/bin/Cve.py index f417d7a9..9ac4efc8 100755 --- a/bin/Cve.py +++ b/bin/Cve.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* """ The CVE Module diff --git a/bin/Dir.py b/bin/Dir.py index 570d3dec..d76a7ad5 100755 --- a/bin/Dir.py +++ b/bin/Dir.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* import argparse @@ -36,7 +36,8 @@ def main(): r_serv = redis.StrictRedis(host=cfg.get("Redis_Queues", "host"), port=cfg.getint("Redis_Queues", "port"), - db=cfg.getint("Redis_Queues", "db")) + db=cfg.getint("Redis_Queues", "db"), + decode_responses=True) publisher.port = 6380 publisher.channel = "Script" diff --git a/bin/DomClassifier.py b/bin/DomClassifier.py index 53d4299d..aed87a55 100755 --- a/bin/DomClassifier.py +++ b/bin/DomClassifier.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* """ diff --git a/bin/DumpValidOnion.py b/bin/DumpValidOnion.py index 14efc0ba..b6f298d6 100755 --- a/bin/DumpValidOnion.py +++ b/bin/DumpValidOnion.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* from pubsublogger import publisher diff --git a/bin/Duplicates.py b/bin/Duplicates.py index a85fa3e7..c89c5391 100755 --- a/bin/Duplicates.py +++ b/bin/Duplicates.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* """ @@ -53,7 +53,8 @@ if __name__ == "__main__": for month in range(0, 13): dico_redis[str(year)+str(month).zfill(2)] = redis.StrictRedis( host=p.config.get("Redis_Level_DB", "host"), port=year, - db=month) + db=month, + decode_responses=True) # FUNCTIONS # publisher.info("Script duplicate started") @@ -102,7 +103,7 @@ if __name__ == "__main__": yearly_index = str(date_today.year)+'00' r_serv0 = dico_redis[yearly_index] r_serv0.incr("current_index") - index = (r_serv0.get("current_index")).decode('utf8') + str(PST.p_date) + index = (r_serv0.get("current_index")) + str(PST.p_date) # Open selected dico range opened_dico = [] @@ -116,7 +117,7 @@ if __name__ == "__main__": for curr_dico_name, curr_dico_redis in opened_dico: for hash_type, paste_hash in paste_hashes.items(): for dico_hash in curr_dico_redis.smembers('HASHS_'+hash_type): - dico_hash = dico_hash.decode('utf8') + dico_hash = dico_hash try: if hash_type == 'ssdeep': @@ -134,11 +135,11 @@ if __name__ == "__main__": # index of paste index_current = r_serv_dico.get(dico_hash) - index_current = index_current.decode('utf8') + index_current = index_current paste_path = r_serv_dico.get(index_current) - paste_path = paste_path.decode('utf8') + paste_path = paste_path paste_date = r_serv_dico.get(index_current+'_date') - paste_date = paste_date.decode('utf8') + paste_date = paste_date paste_date = paste_date if paste_date != None else "No date available" if paste_path != None: if paste_path != PST.p_path: diff --git a/bin/Duplicates_old.py b/bin/Duplicates_old.py deleted file mode 100755 index 2ac66333..00000000 --- a/bin/Duplicates_old.py +++ /dev/null @@ -1,166 +0,0 @@ -#!/usr/bin/env python3.5 -# -*-coding:UTF-8 -* - -""" -The Duplicate module -==================== - -This huge module is, in short term, checking duplicates. - -Requirements: -------------- - - -""" -import redis -import os -import time -from packages import Paste -from pubsublogger import publisher -from pybloomfilter import BloomFilter - -from Helper import Process - -if __name__ == "__main__": - publisher.port = 6380 - publisher.channel = "Script" - - config_section = 'Duplicates' - - p = Process(config_section) - - # REDIS # - # DB OBJECT & HASHS ( DISK ) - # FIXME increase flexibility - dico_redis = {} - for year in range(2013, 2017): - for month in range(0, 16): - dico_redis[str(year)+str(month).zfill(2)] = redis.StrictRedis( - host=p.config.get("Redis_Level_DB", "host"), port=year, - db=month) - #print("dup: "+str(year)+str(month).zfill(2)+"\n") - - # FUNCTIONS # - publisher.info("Script duplicate started") - - set_limit = 100 - bloompath = os.path.join(os.environ['AIL_HOME'], - p.config.get("Directories", "bloomfilters")) - - bloop_path_set = set() - while True: - try: - super_dico = {} - hash_dico = {} - dupl = [] - nb_hash_current = 0 - - x = time.time() - - message = p.get_from_set() - if message is not None: - path = message - PST = Paste.Paste(path) - else: - publisher.debug("Script Attribute is idling 10s") - time.sleep(10) - continue - - PST._set_p_hash_kind("md5") - - # Assignate the correct redis connexion - r_serv1 = dico_redis[PST.p_date.year + PST.p_date.month] - - # Creating the bloom filter name: bloomyyyymm - filebloompath = os.path.join(bloompath, 'bloom' + PST.p_date.year + - PST.p_date.month) - if os.path.exists(filebloompath): - bloom = BloomFilter.open(filebloompath) - bloop_path_set.add(filebloompath) - else: - bloom = BloomFilter(100000000, 0.01, filebloompath) - bloop_path_set.add(filebloompath) - - # UNIQUE INDEX HASHS TABLE - r_serv0 = dico_redis["201600"] - r_serv0.incr("current_index") - index = r_serv0.get("current_index")+str(PST.p_date) - # HASHTABLES PER MONTH (because of r_serv1 changing db) - r_serv1.set(index, PST.p_path) - r_serv1.sadd("INDEX", index) - - # For each bloom filter - opened_bloom = [] - for bloo in bloop_path_set: - # Opening blooms - opened_bloom.append(BloomFilter.open(bloo)) - # For each hash of the paste - for line_hash in PST._get_hash_lines(min=5, start=1, jump=0): - nb_hash_current += 1 - - # Adding the hash in Redis & limiting the set - if r_serv1.scard(line_hash) <= set_limit: - r_serv1.sadd(line_hash, index) - r_serv1.sadd("HASHS", line_hash) - # Adding the hash in the bloom of the month - bloom.add(line_hash) - # Go throught the Database of the bloom filter (of the month) - for bloo in opened_bloom: - if line_hash in bloo: - db = bloo.name[-6:] - # Go throught the Database of the bloom filter (month) - r_serv_bloom = dico_redis[db] - - # set of index paste: set([1,2,4,65]) - hash_current = r_serv_bloom.smembers(line_hash) - # removing itself from the list - hash_current = hash_current - set([index]) - - # if the hash is present at least in 1 files - # (already processed) - if len(hash_current) != 0: - hash_dico[line_hash] = hash_current - - # if there is data in this dictionnary - if len(hash_dico) != 0: - super_dico[index] = hash_dico - - ########################################################################### - - # if there is data in this dictionnary - if len(super_dico) != 0: - # current = current paste, phash_dico = {hash: set, ...} - occur_dico = {} - for current, phash_dico in super_dico.items(): - # phash = hash, pset = set([ pastes ...]) - for phash, pset in hash_dico.items(): - - for p_fname in pset: - occur_dico.setdefault(p_fname, 0) - # Count how much hash is similar per file occuring - # in the dictionnary - if occur_dico[p_fname] >= 0: - occur_dico[p_fname] = occur_dico[p_fname] + 1 - - for paste, count in occur_dico.items(): - percentage = round((count/float(nb_hash_current))*100, 2) - if percentage >= 50: - dupl.append((paste, percentage)) - else: - print('percentage: ' + str(percentage)) - - # Creating the object attribute and save it. - to_print = 'Duplicate;{};{};{};'.format( - PST.p_source, PST.p_date, PST.p_name) - if dupl != []: - PST.__setattr__("p_duplicate", dupl) - PST.save_attribute_redis("p_duplicate", dupl) - publisher.info('{}Detected {}'.format(to_print, len(dupl))) - print('{}Detected {}'.format(to_print, len(dupl))) - - y = time.time() - - publisher.debug('{}Processed in {} sec'.format(to_print, y-x)) - except IOError: - print("CRC Checksum Failed on :", PST.p_path) - publisher.error('{}CRC Checksum Failed'.format(to_print)) diff --git a/bin/Global.py b/bin/Global.py index 29893df8..6115a53f 100755 --- a/bin/Global.py +++ b/bin/Global.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* """ The ZMQ_Feed_Q Module diff --git a/bin/Helper.py b/bin/Helper.py index 154d6ea8..289dd956 100755 --- a/bin/Helper.py +++ b/bin/Helper.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* """ Queue helper module @@ -45,7 +45,8 @@ class PubSub(object): r = redis.StrictRedis( host=self.config.get('RedisPubSub', 'host'), port=self.config.get('RedisPubSub', 'port'), - db=self.config.get('RedisPubSub', 'db')) + db=self.config.get('RedisPubSub', 'db'), + decode_responses=True) self.subscribers = r.pubsub(ignore_subscribe_messages=True) self.subscribers.psubscribe(channel) elif conn_name.startswith('ZMQ'): @@ -69,7 +70,8 @@ class PubSub(object): if conn_name.startswith('Redis'): r = redis.StrictRedis(host=self.config.get('RedisPubSub', 'host'), port=self.config.get('RedisPubSub', 'port'), - db=self.config.get('RedisPubSub', 'db')) + db=self.config.get('RedisPubSub', 'db'), + decode_responses=True) self.publishers['Redis'].append((r, channel)) elif conn_name.startswith('ZMQ'): context = zmq.Context() @@ -99,8 +101,7 @@ class PubSub(object): for sub in self.subscribers: try: msg = sub.recv(zmq.NOBLOCK) - msg = msg.decode('utf8') - yield msg.split(" ", 1)[1] + yield msg.split(b" ", 1)[1] except zmq.error.Again as e: time.sleep(0.2) pass @@ -131,7 +132,8 @@ class Process(object): self.r_temp = redis.StrictRedis( host=self.config.get('RedisPubSub', 'host'), port=self.config.get('RedisPubSub', 'port'), - db=self.config.get('RedisPubSub', 'db')) + db=self.config.get('RedisPubSub', 'db'), + decode_responses=True) self.moduleNum = os.getpid() @@ -152,11 +154,6 @@ class Process(object): int(self.r_temp.scard(in_set))) message = self.r_temp.spop(in_set) - try: - message = message.decode('utf8') - except AttributeError: - pass - timestamp = int(time.mktime(datetime.datetime.now().timetuple())) dir_name = os.environ['AIL_HOME']+self.config.get('Directories', 'pastes') @@ -216,11 +213,6 @@ class Process(object): while True: message = self.r_temp.spop(self.subscriber_name + 'out') - try: - message = message.decode('utf8') - except AttributeError: - pass - if message is None: time.sleep(1) continue diff --git a/bin/Indexer.py b/bin/Indexer.py index a4a3e1f2..1d1ece4b 100755 --- a/bin/Indexer.py +++ b/bin/Indexer.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* """ diff --git a/bin/Keys.py b/bin/Keys.py index 565f874e..9f39cf50 100755 --- a/bin/Keys.py +++ b/bin/Keys.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* """ diff --git a/bin/Lines.py b/bin/Lines.py index 46f67f24..8c9f6827 100755 --- a/bin/Lines.py +++ b/bin/Lines.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* """ diff --git a/bin/Mail.py b/bin/Mail.py index 847f2f9d..abc112a6 100755 --- a/bin/Mail.py +++ b/bin/Mail.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* """ @@ -34,7 +34,8 @@ if __name__ == "__main__": r_serv2 = redis.StrictRedis( host=p.config.get("Redis_Cache", "host"), port=p.config.getint("Redis_Cache", "port"), - db=p.config.getint("Redis_Cache", "db")) + db=p.config.getint("Redis_Cache", "db"), + decode_responses=True) # FUNCTIONS # publisher.info("Suscribed to channel mails_categ") diff --git a/bin/Mixer.py b/bin/Mixer.py index 7c30f989..309bb32a 100755 --- a/bin/Mixer.py +++ b/bin/Mixer.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* """ The Mixer Module @@ -65,7 +65,8 @@ if __name__ == '__main__': server = redis.StrictRedis( host=cfg.get("Redis_Mixer_Cache", "host"), port=cfg.getint("Redis_Mixer_Cache", "port"), - db=cfg.getint("Redis_Mixer_Cache", "db")) + db=cfg.getint("Redis_Mixer_Cache", "db"), + decode_responses=True) # LOGGING # publisher.info("Feed Script started to receive & publish.") diff --git a/bin/ModuleInformation.py b/bin/ModuleInformation.py index 1d88cbc0..807cb87e 100755 --- a/bin/ModuleInformation.py +++ b/bin/ModuleInformation.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* ''' @@ -199,7 +199,8 @@ if __name__ == "__main__": server = redis.StrictRedis( host=cfg.get("Redis_Queues", "host"), port=cfg.getint("Redis_Queues", "port"), - db=cfg.getint("Redis_Queues", "db")) + db=cfg.getint("Redis_Queues", "db"), + decode_responses=True) if args.clear == 1: clearRedisModuleInfo() diff --git a/bin/ModuleStats.py b/bin/ModuleStats.py index 9c25eb75..05ff8dde 100755 --- a/bin/ModuleStats.py +++ b/bin/ModuleStats.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* """ This module makes statistics for some modules and providers @@ -52,7 +52,7 @@ def compute_most_posted(server, message): # Member set is a list of (value, score) pairs if int(member_set[0][1]) < keyword_total_sum: #remove min from set and add the new one - print(module + ': adding ' +keyword+ '(' +str(keyword_total_sum)+') in set and removing '+member_set[0][0].decode('utf8')+'('+str(member_set[0][1])+')') + print(module + ': adding ' +keyword+ '(' +str(keyword_total_sum)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')') server.zrem(redis_progression_name_set, member_set[0][0]) server.zadd(redis_progression_name_set, float(keyword_total_sum), keyword) print(redis_progression_name_set) @@ -135,12 +135,14 @@ if __name__ == '__main__': r_serv_trend = redis.StrictRedis( host=p.config.get("Redis_Level_DB_Trending", "host"), port=p.config.get("Redis_Level_DB_Trending", "port"), - db=p.config.get("Redis_Level_DB_Trending", "db")) + db=p.config.get("Redis_Level_DB_Trending", "db"), + decode_responses=True) r_serv_pasteName = redis.StrictRedis( host=p.config.get("Redis_Paste_Name", "host"), port=p.config.get("Redis_Paste_Name", "port"), - db=p.config.get("Redis_Paste_Name", "db")) + db=p.config.get("Redis_Paste_Name", "db"), + decode_responses=True) # Endless loop getting messages from the input queue while True: diff --git a/bin/ModulesInformationV2.py b/bin/ModulesInformationV2.py index 05170a95..c39e51b2 100755 --- a/bin/ModulesInformationV2.py +++ b/bin/ModulesInformationV2.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* from asciimatics.widgets import Frame, ListBox, Layout, Divider, Text, \ @@ -510,9 +510,8 @@ def clearRedisModuleInfo(): def cleanRedis(): for k in server.keys("MODULE_TYPE_*"): - moduleName = (k[12:].decode('utf8')).split('_')[0] + moduleName = k[12:].split('_')[0] for pid in server.smembers(k): - pid = pid.decode('utf8') flag_pid_valid = False proc = Popen([command_search_name.format(pid)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True) try: @@ -530,7 +529,7 @@ def cleanRedis(): #print flag_pid_valid, 'cleaning', pid, 'in', k server.srem(k, pid) inst_time = datetime.datetime.fromtimestamp(int(time.time())) - log(([str(inst_time).split(' ')[1], moduleName, pid, "Cleared invalid pid in " + (k).decode('utf8')], 0)) + log(([str(inst_time).split(' ')[1], moduleName, pid, "Cleared invalid pid in " + (k)], 0)) #Error due to resize, interrupted sys call except IOError as e: @@ -607,19 +606,16 @@ def fetchQueueData(): printarray_idle = [] printarray_notrunning = [] for queue, card in iter(server.hgetall("queues").items()): - queue = queue.decode('utf8') - card = card.decode('utf8') all_queue.add(queue) key = "MODULE_" + queue + "_" keySet = "MODULE_TYPE_" + queue array_module_type = [] for moduleNum in server.smembers(keySet): - moduleNum = moduleNum.decode('utf8') - value = ( server.get(key + str(moduleNum)) ).decode('utf8') + value = server.get(key + str(moduleNum)) complete_paste_path = ( server.get(key + str(moduleNum) + "_PATH") ) if(complete_paste_path is not None): - complete_paste_path = complete_paste_path.decode('utf8') + complete_paste_path = complete_paste_path COMPLETE_PASTE_PATH_PER_PID[moduleNum] = complete_paste_path if value is not None: @@ -814,7 +810,8 @@ if __name__ == "__main__": server = redis.StrictRedis( host=cfg.get("Redis_Queues", "host"), port=cfg.getint("Redis_Queues", "port"), - db=cfg.getint("Redis_Queues", "db")) + db=cfg.getint("Redis_Queues", "db"), + decode_responses=True) if args.clear == 1: clearRedisModuleInfo() diff --git a/bin/NotificationHelper.py b/bin/NotificationHelper.py index 441be7e8..8c65bb3d 100755 --- a/bin/NotificationHelper.py +++ b/bin/NotificationHelper.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* import configparser diff --git a/bin/Onion.py b/bin/Onion.py index 5e35b8aa..77ed75fe 100755 --- a/bin/Onion.py +++ b/bin/Onion.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* """ The ZMQ_Sub_Onion Module @@ -94,7 +94,8 @@ if __name__ == "__main__": r_cache = redis.StrictRedis( host=p.config.get("Redis_Cache", "host"), port=p.config.getint("Redis_Cache", "port"), - db=p.config.getint("Redis_Cache", "db")) + db=p.config.getint("Redis_Cache", "db"), + decode_responses=True) # FUNCTIONS # publisher.info("Script subscribed to channel onion_categ") diff --git a/bin/Phone.py b/bin/Phone.py index e13d0f13..e3f0f908 100755 --- a/bin/Phone.py +++ b/bin/Phone.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* """ diff --git a/bin/QueueIn.py b/bin/QueueIn.py index 3f3325b7..4495e9c4 100755 --- a/bin/QueueIn.py +++ b/bin/QueueIn.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* diff --git a/bin/QueueOut.py b/bin/QueueOut.py index 32a6a307..dbb36513 100755 --- a/bin/QueueOut.py +++ b/bin/QueueOut.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* from pubsublogger import publisher diff --git a/bin/Queues_Monitoring.py b/bin/Queues_Monitoring.py index 5d120eb0..3f0462ab 100755 --- a/bin/Queues_Monitoring.py +++ b/bin/Queues_Monitoring.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* import redis @@ -30,7 +30,8 @@ def main(): r_serv = redis.StrictRedis( host=cfg.get("Redis_Queues", "host"), port=cfg.getint("Redis_Queues", "port"), - db=cfg.getint("Redis_Queues", "db")) + db=cfg.getint("Redis_Queues", "db"), + decode_responses=True) # LOGGING # publisher.port = 6380 diff --git a/bin/RegexForTermsFrequency.py b/bin/RegexForTermsFrequency.py index 12758219..5e867b27 100755 --- a/bin/RegexForTermsFrequency.py +++ b/bin/RegexForTermsFrequency.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* """ This Module is used for term frequency. @@ -56,7 +56,8 @@ if __name__ == "__main__": server_term = redis.StrictRedis( host=p.config.get("Redis_Level_DB_TermFreq", "host"), port=p.config.get("Redis_Level_DB_TermFreq", "port"), - db=p.config.get("Redis_Level_DB_TermFreq", "db")) + db=p.config.get("Redis_Level_DB_TermFreq", "db"), + decode_responses=True) # FUNCTIONS # publisher.info("RegexForTermsFrequency script started") diff --git a/bin/Release.py b/bin/Release.py index 68496e91..6e7a8277 100755 --- a/bin/Release.py +++ b/bin/Release.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* import time from packages import Paste diff --git a/bin/Repartition_graph.py b/bin/Repartition_graph.py index 58480ffd..02c3adc3 100755 --- a/bin/Repartition_graph.py +++ b/bin/Repartition_graph.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3.5 +#!/usr/bin/python3 # -*-coding:UTF-8 -* import redis @@ -35,7 +35,8 @@ def main(): r_serv = redis.StrictRedis( host=cfg.get("Redis_Level_DB_Hashs", "host"), port=curYear, - db=cfg.getint("Redis_Level_DB_Hashs", "db")) + db=cfg.getint("Redis_Level_DB_Hashs", "db"), + decode_responses=True) # LOGGING # publisher.port = 6380 diff --git a/bin/SQLInjectionDetection.py b/bin/SQLInjectionDetection.py index d93f880a..9e28de72 100755 --- a/bin/SQLInjectionDetection.py +++ b/bin/SQLInjectionDetection.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* """ diff --git a/bin/SentimentAnalysis.py b/bin/SentimentAnalysis.py index e7534265..5f8a64a4 100755 --- a/bin/SentimentAnalysis.py +++ b/bin/SentimentAnalysis.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* """ Sentiment analyser module. @@ -109,7 +109,7 @@ def Analyse(message, server): provider_timestamp = provider + '_' + str(timestamp) server.incr('UniqID') UniqID = server.get('UniqID') - print(provider_timestamp, '->', UniqID.decode('utf8'), 'dropped', num_line_removed, 'lines') + print(provider_timestamp, '->', UniqID, 'dropped', num_line_removed, 'lines') server.sadd(provider_timestamp, UniqID) server.set(UniqID, avg_score) else: @@ -154,7 +154,8 @@ if __name__ == '__main__': server = redis.StrictRedis( host=p.config.get("Redis_Level_DB_Sentiment", "host"), port=p.config.get("Redis_Level_DB_Sentiment", "port"), - db=p.config.get("Redis_Level_DB_Sentiment", "db")) + db=p.config.get("Redis_Level_DB_Sentiment", "db"), + decode_responses=True) while True: message = p.get_from_set() diff --git a/bin/SetForTermsFrequency.py b/bin/SetForTermsFrequency.py index a9ee93ea..5f2abc2c 100755 --- a/bin/SetForTermsFrequency.py +++ b/bin/SetForTermsFrequency.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* """ This Module is used for term frequency. @@ -54,7 +54,8 @@ if __name__ == "__main__": server_term = redis.StrictRedis( host=p.config.get("Redis_Level_DB_TermFreq", "host"), port=p.config.get("Redis_Level_DB_TermFreq", "port"), - db=p.config.get("Redis_Level_DB_TermFreq", "db")) + db=p.config.get("Redis_Level_DB_TermFreq", "db"), + decode_responses=True) # FUNCTIONS # publisher.info("RegexForTermsFrequency script started") diff --git a/bin/Shutdown.py b/bin/Shutdown.py index 0a08e7af..609b257a 100755 --- a/bin/Shutdown.py +++ b/bin/Shutdown.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* """ The ZMQ_Feed_Q Module @@ -37,7 +37,8 @@ def main(): # REDIS r_serv = redis.StrictRedis(host=cfg.get("Redis_Queues", "host"), port=cfg.getint("Redis_Queues", "port"), - db=cfg.getint("Redis_Queues", "db")) + db=cfg.getint("Redis_Queues", "db"), + decode_responses=True) # FIXME: automatic based on the queue name. # ### SCRIPTS #### diff --git a/bin/SourceCode.py b/bin/SourceCode.py index ba166a67..f34bb34e 100644 --- a/bin/SourceCode.py +++ b/bin/SourceCode.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* import time from packages import Paste diff --git a/bin/Tokenize.py b/bin/Tokenize.py index 6e374ee9..fdefeb6a 100755 --- a/bin/Tokenize.py +++ b/bin/Tokenize.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* """ The Tokenize Module diff --git a/bin/Update-conf.py b/bin/Update-conf.py index 187509fe..901cb935 100755 --- a/bin/Update-conf.py +++ b/bin/Update-conf.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* import configparser diff --git a/bin/Web.py b/bin/Web.py index 8369304a..45e5bfbe 100755 --- a/bin/Web.py +++ b/bin/Web.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* """ @@ -47,7 +47,8 @@ if __name__ == "__main__": r_serv2 = redis.StrictRedis( host=p.config.get("Redis_Cache", "host"), port=p.config.getint("Redis_Cache", "port"), - db=p.config.getint("Redis_Cache", "db")) + db=p.config.getint("Redis_Cache", "db"), + decode_responses=True) # Protocol file path protocolsfile_path = os.path.join(os.environ['AIL_HOME'], diff --git a/bin/WebStats.py b/bin/WebStats.py index 90a8f96b..4951ffe2 100755 --- a/bin/WebStats.py +++ b/bin/WebStats.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* """ @@ -116,7 +116,8 @@ if __name__ == '__main__': r_serv_trend = redis.StrictRedis( host=p.config.get("Redis_Level_DB_Trending", "host"), port=p.config.get("Redis_Level_DB_Trending", "port"), - db=p.config.get("Redis_Level_DB_Trending", "db")) + db=p.config.get("Redis_Level_DB_Trending", "db"), + decode_responses=True) # FILE CURVE SECTION # csv_path_proto = os.path.join(os.environ['AIL_HOME'], diff --git a/bin/ailleakObject.py b/bin/ailleakObject.py index 8b7ea185..bbf88711 100755 --- a/bin/ailleakObject.py +++ b/bin/ailleakObject.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* from pymisp.tools.abstractgenerator import AbstractMISPObjectGenerator @@ -15,7 +15,7 @@ class AilleakObject(AbstractMISPObjectGenerator): self._p_source = p_source.split('/')[-5:] self._p_source = '/'.join(self._p_source)[:-3] # -3 removes .gz self._p_date = p_date - self._p_content = p_content.encode('utf8') + self._p_content = p_content self._p_duplicate = p_duplicate self._p_duplicate_number = p_duplicate_number self.generate_attributes() @@ -37,7 +37,7 @@ class ObjectWrapper: self.eventID_to_push = self.get_daily_event_id() cfg = configparser.ConfigParser() cfg.read('./packages/config.cfg') - self.maxDuplicateToPushToMISP = cfg.getint("ailleakObject", "maxDuplicateToPushToMISP") + self.maxDuplicateToPushToMISP = cfg.getint("ailleakObject", "maxDuplicateToPushToMISP") def add_new_object(self, moduleName, path): self.moduleName = moduleName @@ -45,13 +45,10 @@ class ObjectWrapper: self.paste = Paste.Paste(path) self.p_date = self.date_to_str(self.paste.p_date) self.p_source = self.paste.p_path - self.p_content = self.paste.get_p_content().decode('utf8') - + self.p_content = self.paste.get_p_content() + temp = self.paste._get_p_duplicate() - try: - temp = temp.decode('utf8') - except AttributeError: - pass + #beautifier temp = json.loads(temp) self.p_duplicate_number = len(temp) if len(temp) >= 0 else 0 @@ -108,8 +105,8 @@ class ObjectWrapper: orgc_id = None sharing_group_id = None date = None - event = self.pymisp.new_event(distribution, threat, - analysis, info, date, + event = self.pymisp.new_event(distribution, threat, + analysis, info, date, published, orgc_id, org_id, sharing_group_id) return event diff --git a/bin/alertHandler.py b/bin/alertHandler.py index 69338cdc..41888be3 100755 --- a/bin/alertHandler.py +++ b/bin/alertHandler.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* """ @@ -54,7 +54,8 @@ if __name__ == "__main__": server = redis.StrictRedis( host=p.config.get("Redis_Level_DB", "host"), port=curYear, - db=p.config.get("Redis_Level_DB", "db")) + db=p.config.get("Redis_Level_DB", "db"), + decode_responses=True) # FUNCTIONS # publisher.info("Script duplicate started") @@ -62,7 +63,6 @@ if __name__ == "__main__": while True: message = p.get_from_set() if message is not None: - #decode because of pyhton3 module_name, p_path = message.split(';') print("new alert : {}".format(module_name)) #PST = Paste.Paste(p_path) diff --git a/bin/empty_queue.py b/bin/empty_queue.py index 77d22c1f..5b763a32 100755 --- a/bin/empty_queue.py +++ b/bin/empty_queue.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* """ diff --git a/bin/feeder/pystemon-feeder.py b/bin/feeder/pystemon-feeder.py index 8f6130fe..50ffaeba 100755 --- a/bin/feeder/pystemon-feeder.py +++ b/bin/feeder/pystemon-feeder.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*- coding: utf-8 -*- # # This file is part of AIL framework - Analysis Information Leak framework @@ -50,7 +50,7 @@ socket = context.socket(zmq.PUB) socket.bind(zmq_url) # check https://github.com/cvandeplas/pystemon/blob/master/pystemon.yaml#L16 -r = redis.StrictRedis(host='localhost', db=10) +r = redis.StrictRedis(host='localhost', db=10, decode_responses=True) # 101 pastes processed feed # 102 raw pastes feed diff --git a/bin/feeder/test-zmq.py b/bin/feeder/test-zmq.py index d9769fb5..f6f28aa1 100644 --- a/bin/feeder/test-zmq.py +++ b/bin/feeder/test-zmq.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*- coding: utf-8 -*- # # This file is part of AIL framework - Analysis Information Leak framework @@ -19,14 +19,14 @@ socket.connect ("tcp://crf.circl.lu:%s" % port) # 101 Name of the pastes only # 102 Full pastes in raw base64(gz) -topicfilter = b"102" +topicfilter = "102" socket.setsockopt(zmq.SUBSCRIBE, topicfilter) -print('b0') + while True: message = socket.recv() print('b1') print (message) - if topicfilter == b"102": + if topicfilter == "102": topic, paste, messagedata = message.split() print(paste, messagedata) else: diff --git a/bin/import_dir.py b/bin/import_dir.py index a8faba7f..d8360631 100755 --- a/bin/import_dir.py +++ b/bin/import_dir.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*- coding: utf-8 -*- import zmq diff --git a/bin/indexer_lookup.py b/bin/indexer_lookup.py index 6642afce..cb01e3f2 100644 --- a/bin/indexer_lookup.py +++ b/bin/indexer_lookup.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*- coding: utf-8 -*- # # This file is part of AIL framework - Analysis Information Leak framework diff --git a/bin/launch_queues.py b/bin/launch_queues.py index 55cfe717..9eac1a98 100755 --- a/bin/launch_queues.py +++ b/bin/launch_queues.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* diff --git a/bin/packages/Date.py b/bin/packages/Date.py index 85da5b36..72b960b1 100644 --- a/bin/packages/Date.py +++ b/bin/packages/Date.py @@ -1,3 +1,5 @@ +#!/usr/bin/python3 + class Date(object): """docstring for Date""" def __init__(self, *args): @@ -30,7 +32,7 @@ class Date(object): def _set_day(self, day): self.day = day - + def substract_day(self, numDay): import datetime computed_date = datetime.date(int(self.year), int(self.month), int(self.day)) - datetime.timedelta(numDay) @@ -38,4 +40,3 @@ class Date(object): comp_month = str(computed_date.month).zfill(2) comp_day = str(computed_date.day).zfill(2) return comp_year + comp_month + comp_day - diff --git a/bin/packages/Hash.py b/bin/packages/Hash.py index a55a8695..f0bb1d8e 100644 --- a/bin/packages/Hash.py +++ b/bin/packages/Hash.py @@ -1,3 +1,5 @@ +#!/usr/bin/python3 + import hashlib import crcmod import mmh3 diff --git a/bin/packages/Paste.py b/bin/packages/Paste.py index ce78e46b..9564cc19 100755 --- a/bin/packages/Paste.py +++ b/bin/packages/Paste.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3.5 +#!/usr/bin/python3 """ The ``Paste Class`` @@ -69,11 +69,13 @@ class Paste(object): self.cache = redis.StrictRedis( host=cfg.get("Redis_Queues", "host"), port=cfg.getint("Redis_Queues", "port"), - db=cfg.getint("Redis_Queues", "db")) + db=cfg.getint("Redis_Queues", "db"), + decode_responses=True) self.store = redis.StrictRedis( host=cfg.get("Redis_Data_Merging", "host"), port=cfg.getint("Redis_Data_Merging", "port"), - db=cfg.getint("Redis_Data_Merging", "db")) + db=cfg.getint("Redis_Data_Merging", "db"), + decode_responses=True) self.p_path = p_path self.p_name = os.path.basename(self.p_path) @@ -112,21 +114,17 @@ class Paste(object): paste = self.cache.get(self.p_path) if paste is None: try: - with gzip.open(self.p_path, 'rb') as f: + with gzip.open(self.p_path, 'r') as f: paste = f.read() self.cache.set(self.p_path, paste) self.cache.expire(self.p_path, 300) except: - paste = b'' + paste = '' - return paste.decode('utf8') + return paste def get_p_content_as_file(self): - try: - message = StringIO( (self.get_p_content()).decode('utf8') ) - except AttributeError: - message = StringIO( (self.get_p_content()) ) - + message = StringIO(self.get_p_content()) return message def get_p_content_with_removed_lines(self, threshold): @@ -204,7 +202,7 @@ class Paste(object): """ for hash_name, the_hash in self.p_hash_kind.items(): - self.p_hash[hash_name] = the_hash.Calculate(self.get_p_content().encode('utf8')) + self.p_hash[hash_name] = the_hash.Calculate(self.get_p_content().encode()) return self.p_hash def _get_p_language(self): @@ -276,7 +274,6 @@ class Paste(object): def _get_p_duplicate(self): self.p_duplicate = self.store.hget(self.p_path, "p_duplicate") if self.p_duplicate is not None: - self.p_duplicate = self.p_duplicate.decode('utf8') return self.p_duplicate else: return '[]' @@ -335,7 +332,7 @@ class Paste(object): json_duplicate = self.store.hget(path, attr_name) #json save on redis if json_duplicate is not None: - list_duplicate = json.loads(json_duplicate.decode('utf8')) + list_duplicate = json.loads(json_duplicate) # add new duplicate list_duplicate.append([hash_type, self.p_path, percent, date]) self.store.hset(path, attr_name, json.dumps(list_duplicate)) diff --git a/bin/packages/lib_refine.py b/bin/packages/lib_refine.py index fe03e730..83511e40 100644 --- a/bin/packages/lib_refine.py +++ b/bin/packages/lib_refine.py @@ -1,3 +1,5 @@ +#!/usr/bin/python3 + import re import dns.resolver diff --git a/bin/packages/lib_words.py b/bin/packages/lib_words.py index f58e85db..54581403 100644 --- a/bin/packages/lib_words.py +++ b/bin/packages/lib_words.py @@ -1,3 +1,5 @@ +#!/usr/bin/python3 + import os import string @@ -110,7 +112,7 @@ def create_curve_with_word_file(r_serv, csvfilename, feederfilename, year, month # if the word have a value for the day # FIXME Due to performance issues (too many tlds, leads to more than 7s to perform this procedure), I added a threshold value = r_serv.hget(word, curdate) - value = int(value.decode('utf8')) + value = int(value) if value >= threshold: row.append(value) writer.writerow(row) @@ -135,7 +137,7 @@ def create_curve_from_redis_set(server, csvfilename, set_to_plot, year, month): redis_set_name = set_to_plot + "_set_" + str(year) + str(month).zfill(2) words = list(server.smembers(redis_set_name)) - words = [x.decode('utf-8') for x in words] + #words = [x.decode('utf-8') for x in words] headers = ['Date'] + words with open(csvfilename+'.csv', 'w') as f: @@ -154,5 +156,5 @@ def create_curve_from_redis_set(server, csvfilename, set_to_plot, year, month): row.append(0) else: # if the word have a value for the day - row.append(value.decode('utf8')) + row.append(value) writer.writerow(row) diff --git a/bin/preProcessFeed.py b/bin/preProcessFeed.py index 5c7a346e..37ee0512 100755 --- a/bin/preProcessFeed.py +++ b/bin/preProcessFeed.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* ''' diff --git a/bin/template.py b/bin/template.py index 3f93e827..f311d439 100755 --- a/bin/template.py +++ b/bin/template.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* """ Template for new modules diff --git a/bin/tor_fetcher.py b/bin/tor_fetcher.py index 4f0056ad..67a2f4f8 100644 --- a/bin/tor_fetcher.py +++ b/bin/tor_fetcher.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* import socks diff --git a/var/www/Flask_base_template.py b/var/www/Flask_base_template.py index 52a6e1dd..e9b65cda 100644 --- a/var/www/Flask_base_template.py +++ b/var/www/Flask_base_template.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* ''' diff --git a/var/www/Flask_server.py b/var/www/Flask_server.py index e8228399..0be6854a 100755 --- a/var/www/Flask_server.py +++ b/var/www/Flask_server.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* import redis diff --git a/var/www/create_new_web_module.py b/var/www/create_new_web_module.py index 7dfcf5b5..b2f26ff4 100755 --- a/var/www/create_new_web_module.py +++ b/var/www/create_new_web_module.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* "Hepler to create a new webpage associated with a module." diff --git a/var/www/modules/Flask_config.py b/var/www/modules/Flask_config.py index 2801242e..26a72e76 100644 --- a/var/www/modules/Flask_config.py +++ b/var/www/modules/Flask_config.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* ''' @@ -26,37 +26,44 @@ cfg.read(configfile) r_serv = redis.StrictRedis( host=cfg.get("Redis_Queues", "host"), port=cfg.getint("Redis_Queues", "port"), - db=cfg.getint("Redis_Queues", "db")) + db=cfg.getint("Redis_Queues", "db"), + decode_responses=True) r_serv_log = redis.StrictRedis( host=cfg.get("Redis_Log", "host"), port=cfg.getint("Redis_Log", "port"), - db=cfg.getint("Redis_Log", "db")) + db=cfg.getint("Redis_Log", "db"), + decode_responses=True) r_serv_charts = redis.StrictRedis( host=cfg.get("Redis_Level_DB_Trending", "host"), port=cfg.getint("Redis_Level_DB_Trending", "port"), - db=cfg.getint("Redis_Level_DB_Trending", "db")) + db=cfg.getint("Redis_Level_DB_Trending", "db"), + decode_responses=True) r_serv_sentiment = redis.StrictRedis( host=cfg.get("Redis_Level_DB_Sentiment", "host"), port=cfg.getint("Redis_Level_DB_Sentiment", "port"), - db=cfg.getint("Redis_Level_DB_Sentiment", "db")) + db=cfg.getint("Redis_Level_DB_Sentiment", "db"), + decode_responses=True) r_serv_term = redis.StrictRedis( host=cfg.get("Redis_Level_DB_TermFreq", "host"), port=cfg.getint("Redis_Level_DB_TermFreq", "port"), - db=cfg.getint("Redis_Level_DB_TermFreq", "db")) + db=cfg.getint("Redis_Level_DB_TermFreq", "db"), + decode_responses=True) r_serv_cred = redis.StrictRedis( host=cfg.get("Redis_Level_DB_TermCred", "host"), port=cfg.getint("Redis_Level_DB_TermCred", "port"), - db=cfg.getint("Redis_Level_DB_TermCred", "db")) + db=cfg.getint("Redis_Level_DB_TermCred", "db"), + decode_responses=True) r_serv_pasteName = redis.StrictRedis( host=cfg.get("Redis_Paste_Name", "host"), port=cfg.getint("Redis_Paste_Name", "port"), - db=cfg.getint("Redis_Paste_Name", "db")) + db=cfg.getint("Redis_Paste_Name", "db"), + decode_responses=True) # VARIABLES # max_preview_char = int(cfg.get("Flask", "max_preview_char")) # Maximum number of character to display in the tooltip diff --git a/var/www/modules/browsepastes/Flask_browsepastes.py b/var/www/modules/browsepastes/Flask_browsepastes.py index 84a3149c..7cac1a9d 100644 --- a/var/www/modules/browsepastes/Flask_browsepastes.py +++ b/var/www/modules/browsepastes/Flask_browsepastes.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* ''' @@ -37,7 +37,8 @@ for year in os.listdir(lvdbdir): r_serv_db[intYear] = redis.StrictRedis( host=cfg.get("Redis_Level_DB", "host"), port=intYear, - db=cfg.getint("Redis_Level_DB", "db")) + db=cfg.getint("Redis_Level_DB", "db"), + decode_responses=True) yearList.sort(reverse=True) browsepastes = Blueprint('browsepastes', __name__, template_folder='templates') @@ -55,8 +56,8 @@ def getPastebyType(server, module_name): def event_stream_getImportantPasteByModule(module_name, year): index = 0 all_pastes_list = getPastebyType(r_serv_db[year], module_name) + for path in all_pastes_list: - path = path.decode('utf8') index += 1 paste = Paste.Paste(path) content = paste.get_p_content() @@ -94,7 +95,6 @@ def importantPasteByModule(): allPastes = getPastebyType(r_serv_db[currentSelectYear], module_name) for path in allPastes[0:10]: - path = path.decode('utf8') all_path.append(path) paste = Paste.Paste(path) content = paste.get_p_content() diff --git a/var/www/modules/dashboard/Flask_dashboard.py b/var/www/modules/dashboard/Flask_dashboard.py index a1478d2f..563eb007 100644 --- a/var/www/modules/dashboard/Flask_dashboard.py +++ b/var/www/modules/dashboard/Flask_dashboard.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* ''' @@ -26,23 +26,11 @@ def event_stream(): pubsub = r_serv_log.pubsub() pubsub.psubscribe("Script" + '.*') for msg in pubsub.listen(): - # bytes conversion - try: - type = msg['type'].decode('utf8') - except: - type = msg['type'] - try: - pattern = msg['pattern'].decode('utf8') - except: - pattern = msg['pattern'] - try: - channel = msg['channel'].decode('utf8') - except: - channel = msg['channel'] - try: - data = msg['data'].decode('utf8') - except: - data = msg['data'] + + type = msg['type'] + pattern = msg['pattern'] + channel = msg['channel'] + data = msg['data'] msg = {'channel': channel, 'type': type, 'pattern': pattern, 'data': data} @@ -54,15 +42,13 @@ def get_queues(r): # We may want to put the llen in a pipeline to do only one query. newData = [] for queue, card in r.hgetall("queues").items(): - queue = queue.decode('utf8') - card = card.decode('utf8') + key = "MODULE_" + queue + "_" keySet = "MODULE_TYPE_" + queue for moduleNum in r.smembers(keySet): - moduleNum = moduleNum.decode('utf8') - value = ( r.get(key + str(moduleNum)) ).decode('utf8') + value = r.get(key + str(moduleNum)) if value is not None: timestamp, path = value.split(", ") diff --git a/var/www/modules/rawSkeleton/Flask_rawSkeleton.py b/var/www/modules/rawSkeleton/Flask_rawSkeleton.py index c8183eec..d17e2b33 100644 --- a/var/www/modules/rawSkeleton/Flask_rawSkeleton.py +++ b/var/www/modules/rawSkeleton/Flask_rawSkeleton.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* ''' diff --git a/var/www/modules/search/Flask_search.py b/var/www/modules/search/Flask_search.py index 7e45aad7..afce2452 100644 --- a/var/www/modules/search/Flask_search.py +++ b/var/www/modules/search/Flask_search.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* ''' diff --git a/var/www/modules/sentiment/Flask_sentiment.py b/var/www/modules/sentiment/Flask_sentiment.py index 5102294a..fbf09f55 100644 --- a/var/www/modules/sentiment/Flask_sentiment.py +++ b/var/www/modules/sentiment/Flask_sentiment.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* ''' @@ -58,11 +58,7 @@ def sentiment_analysis_getplotdata(): if allProvider == "True": range_providers = r_serv_charts.smembers('all_provider_set') - range_providers_str = [] - for domain in range_providers: - m = domain.decode('utf8') - range_providers_str.append(m) - return jsonify(list(range_providers_str)) + return jsonify(list(range_providers)) else: range_providers = r_serv_charts.zrevrangebyscore('providers_set_'+ get_date_range(0)[0], '+inf', '-inf', start=0, num=8) # if empty, get yesterday top providers @@ -74,13 +70,7 @@ def sentiment_analysis_getplotdata(): print('today provider empty') range_providers = r_serv_charts.smembers('all_provider_set') - # decode bytes - range_providers_str = [] - for domain in range_providers: - m = domain.decode('utf8') - range_providers_str.append(m) - - return jsonify(list(range_providers_str)) + return jsonify(list(range_providers)) elif provider is not None: to_return = {} @@ -92,7 +82,7 @@ def sentiment_analysis_getplotdata(): list_value = [] for cur_id in r_serv_sentiment.smembers(cur_set_name): - cur_value = (r_serv_sentiment.get(cur_id)).decode('utf8') + cur_value = (r_serv_sentiment.get(cur_id)) list_value.append(cur_value) list_date[cur_timestamp] = list_value to_return[provider] = list_date @@ -115,7 +105,7 @@ def sentiment_analysis_plot_tool_getdata(): if getProviders == 'True': providers = [] for cur_provider in r_serv_charts.smembers('all_provider_set'): - providers.append(cur_provider.decode('utf8')) + providers.append(cur_provider) return jsonify(providers) else: @@ -144,7 +134,7 @@ def sentiment_analysis_plot_tool_getdata(): list_value = [] for cur_id in r_serv_sentiment.smembers(cur_set_name): - cur_value = (r_serv_sentiment.get(cur_id)).decode('utf8') + cur_value = (r_serv_sentiment.get(cur_id)) list_value.append(cur_value) list_date[cur_timestamp] = list_value to_return[cur_provider] = list_date diff --git a/var/www/modules/showpaste/Flask_showpaste.py b/var/www/modules/showpaste/Flask_showpaste.py index a2abf2bf..20a9afdc 100644 --- a/var/www/modules/showpaste/Flask_showpaste.py +++ b/var/www/modules/showpaste/Flask_showpaste.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* ''' diff --git a/var/www/modules/terms/Flask_terms.py b/var/www/modules/terms/Flask_terms.py index db230ee6..d550c2c0 100644 --- a/var/www/modules/terms/Flask_terms.py +++ b/var/www/modules/terms/Flask_terms.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* ''' @@ -72,7 +72,7 @@ def Term_getValueOverRange(word, startDate, num_day, per_paste=""): curr_to_return = 0 for timestamp in range(startDate, startDate - max(num_day)*oneDay, -oneDay): value = r_serv_term.hget(per_paste+str(timestamp), word) - curr_to_return += int(value.decode('utf8')) if value is not None else 0 + curr_to_return += int(value) if value is not None else 0 for i in num_day: if passed_days == i-1: to_return.append(curr_to_return) @@ -157,10 +157,8 @@ def terms_management(): trackReg_list_values = [] trackReg_list_num_of_paste = [] for tracked_regex in r_serv_term.smembers(TrackedRegexSet_Name): - tracked_regex = tracked_regex.decode('utf8') - print(tracked_regex) - notificationEMailTermMapping[tracked_regex] = "\n".join( (r_serv_term.smembers(TrackedTermsNotificationEmailsPrefix_Name + tracked_regex)).decode('utf8') ) + notificationEMailTermMapping[tracked_regex] = "\n".join( (r_serv_term.smembers(TrackedTermsNotificationEmailsPrefix_Name + tracked_regex)) ) if tracked_regex not in notificationEnabledDict: notificationEnabledDict[tracked_regex] = False @@ -176,7 +174,7 @@ def terms_management(): value_range.append(term_date) trackReg_list_values.append(value_range) - if tracked_regex.encode('utf8') in r_serv_term.smembers(TrackedTermsNotificationEnabled_Name): + if tracked_regex in r_serv_term.smembers(TrackedTermsNotificationEnabled_Name): notificationEnabledDict[tracked_regex] = True #Set @@ -184,9 +182,9 @@ def terms_management(): trackSet_list_values = [] trackSet_list_num_of_paste = [] for tracked_set in r_serv_term.smembers(TrackedSetSet_Name): - tracked_set = tracked_set.decode('utf8') + tracked_set = tracked_set - notificationEMailTermMapping[tracked_set] = "\n".join( (r_serv_term.smembers(TrackedTermsNotificationEmailsPrefix_Name + tracked_set)).decode('utf8') ) + notificationEMailTermMapping[tracked_set] = "\n".join( (r_serv_term.smembers(TrackedTermsNotificationEmailsPrefix_Name + tracked_set)) ) if tracked_set not in notificationEnabledDict: @@ -203,7 +201,7 @@ def terms_management(): value_range.append(term_date) trackSet_list_values.append(value_range) - if tracked_set.encode('utf8') in r_serv_term.smembers(TrackedTermsNotificationEnabled_Name): + if tracked_set in r_serv_term.smembers(TrackedTermsNotificationEnabled_Name): notificationEnabledDict[tracked_set] = True #Tracked terms @@ -211,13 +209,7 @@ def terms_management(): track_list_values = [] track_list_num_of_paste = [] for tracked_term in r_serv_term.smembers(TrackedTermsSet_Name): - tracked_term = tracked_term.decode('utf8') - print('tracked_term : .') - print(tracked_term) - #print(TrackedTermsNotificationEmailsPrefix_Name) - print(r_serv_term.smembers(TrackedTermsNotificationEmailsPrefix_Name + tracked_term)) - #print(tracked_term) notificationEMailTermMapping[tracked_term] = "\n".join( r_serv_term.smembers(TrackedTermsNotificationEmailsPrefix_Name + tracked_term)) if tracked_term not in notificationEnabledDict: @@ -229,16 +221,14 @@ def terms_management(): term_date = r_serv_term.hget(TrackedTermsDate_Name, tracked_term) set_paste_name = "tracked_" + tracked_term - print('set_paste_name : .') - print(set_paste_name) + track_list_num_of_paste.append( r_serv_term.scard(set_paste_name) ) - print('track_list_num_of_paste : .') - print(track_list_num_of_paste) + term_date = datetime.datetime.utcfromtimestamp(int(term_date)) if term_date is not None else "No date recorded" value_range.append(term_date) track_list_values.append(value_range) - if tracked_term.encode('utf8') in r_serv_term.smembers(TrackedTermsNotificationEnabled_Name): + if tracked_term in r_serv_term.smembers(TrackedTermsNotificationEnabled_Name): notificationEnabledDict[tracked_term] = True #blacklist terms @@ -246,7 +236,7 @@ def terms_management(): for blacked_term in r_serv_term.smembers(BlackListTermsSet_Name): term_date = r_serv_term.hget(BlackListTermsDate_Name, blacked_term) term_date = datetime.datetime.utcfromtimestamp(int(term_date)) if term_date is not None else "No date recorded" - black_list.append([blacked_term.decode('utf8'), term_date]) + black_list.append([blacked_term, term_date]) return render_template("terms_management.html", black_list=black_list, track_list=track_list, trackReg_list=trackReg_list, trackSet_list=trackSet_list, @@ -259,8 +249,6 @@ def terms_management(): @terms.route("/terms_management_query_paste/") def terms_management_query_paste(): term = request.args.get('term') - print('term :') - print(term) paste_info = [] # check if regex or not @@ -275,7 +263,6 @@ def terms_management_query_paste(): track_list_path = r_serv_term.smembers(set_paste_name) for path in track_list_path: - path = path.decode('utf8') paste = Paste.Paste(path) p_date = str(paste._get_p_date()) p_date = p_date[6:]+'/'+p_date[4:6]+'/'+p_date[0:4] @@ -402,7 +389,6 @@ def terms_management_action(): r_serv_term.hdel(TrackedRegexDate_Name, term) elif term.startswith('\\') and term.endswith('\\'): r_serv_term.srem(TrackedSetSet_Name, term) - #print(term) r_serv_term.hdel(TrackedSetDate_Name, term) else: r_serv_term.srem(TrackedTermsSet_Name, term.lower()) @@ -515,7 +501,7 @@ def terms_plot_top_data(): curr_value_range = int(value) if value is not None else 0 value_range.append([timestamp, curr_value_range]) - to_return.append([term.decode('utf8'), value_range, tot_value, position]) + to_return.append([term, value_range, tot_value, position]) return jsonify(to_return) @@ -532,7 +518,6 @@ def credentials_management_query_paste(): paste_info = [] for pathNum in allPath: path = r_serv_cred.hget(REDIS_KEY_ALL_PATH_SET_REV, pathNum) - path = path.decode('utf8') paste = Paste.Paste(path) p_date = str(paste._get_p_date()) p_date = p_date[6:]+'/'+p_date[4:6]+'/'+p_date[0:4] @@ -574,7 +559,6 @@ def cred_management_action(): iter_num = 0 tot_iter = len(AllUsernameInRedis)*len(possibilities) for tempUsername in AllUsernameInRedis: - tempUsername = tempUsername.decode('utf8') for poss in possibilities: #FIXME print progress if(iter_num % int(tot_iter/20) == 0): @@ -583,7 +567,7 @@ def cred_management_action(): iter_num += 1 if poss in tempUsername: - num = (r_serv_cred.hget(REDIS_KEY_ALL_CRED_SET, tempUsername)).decode('utf8') + num = (r_serv_cred.hget(REDIS_KEY_ALL_CRED_SET, tempUsername)) if num is not None: uniq_num_set.add(num) for num in r_serv_cred.smembers(tempUsername): @@ -592,7 +576,7 @@ def cred_management_action(): data = {'usr': [], 'path': [], 'numPaste': [], 'simil': []} for Unum in uniq_num_set: levenRatio = 2.0 - username = (r_serv_cred.hget(REDIS_KEY_ALL_CRED_SET_REV, Unum)).decode('utf8') + username = (r_serv_cred.hget(REDIS_KEY_ALL_CRED_SET_REV, Unum)) # Calculate Levenshtein distance, ignore negative ratio supp_splitted = supplied.split() @@ -604,20 +588,11 @@ def cred_management_action(): data['usr'].append(username) - try: - Unum = Unum.decode('utf8') - except: - pass allPathNum = list(r_serv_cred.smembers(REDIS_KEY_MAP_CRED_TO_PATH+'_'+Unum)) - # decode bytes - allPathNum_str = [] - for p in allPathNum: - allPathNum_str.append(p.decode('utf8')) - - data['path'].append(allPathNum_str) - data['numPaste'].append(len(allPathNum_str)) + data['path'].append(allPathNum) + data['numPaste'].append(len(allPathNum)) data['simil'].append(levenRatioStr) to_return = {} diff --git a/var/www/modules/trendingcharts/Flask_trendingcharts.py b/var/www/modules/trendingcharts/Flask_trendingcharts.py index cfd4b596..d0c2e8f1 100644 --- a/var/www/modules/trendingcharts/Flask_trendingcharts.py +++ b/var/www/modules/trendingcharts/Flask_trendingcharts.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* ''' @@ -47,9 +47,8 @@ def progressionCharts(): date_range = get_date_range(num_day) # Retreive all data from the last num_day for date in date_range: + curr_value = r_serv_charts.hget(attribute_name, date) - if curr_value is not None: - curr_value = curr_value.decode('utf8') bar_values.append([date[0:4]+'/'+date[4:6]+'/'+date[6:8], int(curr_value if curr_value is not None else 0)]) bar_values.insert(0, attribute_name) return jsonify(bar_values) @@ -58,13 +57,7 @@ def progressionCharts(): redis_progression_name = "z_top_progression_" + trending_name keyw_value = r_serv_charts.zrevrangebyscore(redis_progression_name, '+inf', '-inf', withscores=True, start=0, num=10) - # decode bytes - keyw_value_str = [] - for domain, value in keyw_value: - m = domain.decode('utf8'), value - keyw_value_str.append(m) - - return jsonify(keyw_value_str) + return jsonify(keyw_value) @trendings.route("/wordstrending/") def wordstrending(): diff --git a/var/www/modules/trendingmodules/Flask_trendingmodules.py b/var/www/modules/trendingmodules/Flask_trendingmodules.py index b210742f..eca5e606 100644 --- a/var/www/modules/trendingmodules/Flask_trendingmodules.py +++ b/var/www/modules/trendingmodules/Flask_trendingmodules.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* ''' @@ -28,18 +28,12 @@ def get_top_relevant_data(server, module_name): for date in get_date_range(15): redis_progression_name_set = 'top_'+ module_name +'_set_' + date member_set = server.zrevrangebyscore(redis_progression_name_set, '+inf', '-inf', withscores=True) - member_set_str = [] - # decode bytes - for domain, value in member_set: - m = domain.decode('utf8'), value - member_set_str.append(m) - - if len(member_set_str) == 0: #No data for this date + if len(member_set) == 0: #No data for this date days += 1 else: - member_set_str.insert(0, ("passed_days", days)) - return member_set_str + member_set.insert(0, ("passed_days", days)) + return member_set def get_date_range(num_day): @@ -94,13 +88,13 @@ def providersChart(): for date in date_range: curr_value_size = ( r_serv_charts.hget(keyword_name+'_'+'size', date) ) if curr_value_size is not None: - curr_value_size = curr_value_size.decode('utf8') + curr_value_size = curr_value_size curr_value_num = r_serv_charts.hget(keyword_name+'_'+'num', date) curr_value_size_avg = r_serv_charts.hget(keyword_name+'_'+'avg', date) if curr_value_size_avg is not None: - curr_value_size_avg = curr_value_size_avg.decode('utf8') + curr_value_size_avg = curr_value_size_avg if module_name == "size": @@ -119,16 +113,10 @@ def providersChart(): member_set = r_serv_charts.zrevrangebyscore(redis_provider_name_set, '+inf', '-inf', withscores=True, start=0, num=8) - # decode bytes - member_set_str = [] - for domain, value in member_set: - m = domain.decode('utf8'), value - member_set_str.append(m) - # Member set is a list of (value, score) pairs - if len(member_set_str) == 0: + if len(member_set) == 0: member_set_str.append(("No relevant data", float(100))) - return jsonify(member_set_str) + return jsonify(member_set) @trendingmodules.route("/moduletrending/")