diff --git a/bin/Attributes.py b/bin/Attributes.py deleted file mode 100755 index 74357065..00000000 --- a/bin/Attributes.py +++ /dev/null @@ -1,50 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* - -""" -The ZMQ_Sub_Attribute Module -============================ - -This module is saving Attribute of the paste into redis - -""" -import time -from packages import Paste -from pubsublogger import publisher - -from Helper import Process - -if __name__ == "__main__": - publisher.port = 6380 - publisher.channel = "Script" - - config_section = 'Attributes' - - p = Process(config_section) - - # FUNCTIONS # - publisher.info("Attribute is Running") - - while True: - try: - message = p.get_from_set() - - if message is not None: - PST = Paste.Paste(message) - else: - publisher.debug("Script Attribute is idling 1s") - print('sleeping') - time.sleep(1) - continue - - # FIXME do it directly in the class - PST.save_attribute_redis("p_encoding", PST._get_p_encoding()) - #PST.save_attribute_redis("p_language", PST._get_p_language()) - # FIXME why not all saving everything there. - PST.save_all_attributes_redis() - # FIXME Not used. - PST.store.sadd("Pastes_Objects", PST.p_rel_path) - except IOError: - print("CRC Checksum Failed on :", PST.p_rel_path) - publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format( - PST.p_source, PST.p_date, PST.p_name)) diff --git a/bin/CreditCards.py b/bin/CreditCards.py index 0c6bdf3f..456e474a 100755 --- a/bin/CreditCards.py +++ b/bin/CreditCards.py @@ -31,14 +31,10 @@ if __name__ == "__main__": p = Process(config_section) # FUNCTIONS # - publisher.info("Creditcard script subscribed to channel creditcard_categ") - + publisher.info("CreditCards script started") creditcard_regex = "4[0-9]{12}(?:[0-9]{3})?" - # FIXME For retro compatibility - channel = 'creditcard_categ' - # Source: http://www.richardsramblings.com/regex/credit-card-numbers/ cards = [ r'\b4\d{3}(?:[\ \-]?)\d{4}(?:[\ \-]?)\d{4}(?:[\ \-]?)\d{4}\b', # 16-digit VISA, with separators @@ -69,9 +65,6 @@ if __name__ == "__main__": print(clean_card, 'is valid') creditcard_set.add(clean_card) - paste.__setattr__(channel, creditcard_set) - paste.save_attribute_redis(channel, creditcard_set) - pprint.pprint(creditcard_set) to_print = 'CreditCard;{};{};{};'.format( paste.p_source, paste.p_date, paste.p_name) diff --git a/bin/Onion.py b/bin/Onion.py index ad1fe5bb..2b6be55e 100755 --- a/bin/Onion.py +++ b/bin/Onion.py @@ -193,9 +193,6 @@ if __name__ == "__main__": r_onion.sadd('i2p_crawler_queue', msg) ''' - # Saving the list of extracted onion domains. - PST.__setattr__(channel, domains_list) - PST.save_attribute_redis(channel, domains_list) to_print = 'Onion;{};{};{};'.format(PST.p_source, PST.p_date, PST.p_name) diff --git a/bin/Web.py b/bin/Web.py index ca4366e8..68e37c25 100755 --- a/bin/Web.py +++ b/bin/Web.py @@ -58,10 +58,7 @@ if __name__ == "__main__": cc_critical = p.config.get("Url", "cc_critical") # FUNCTIONS # - publisher.info("Script URL subscribed to channel web_categ") - - # FIXME For retro compatibility - channel = 'web_categ' + publisher.info("Script URL Started") message = p.get_from_set() prec_filename = None @@ -150,11 +147,6 @@ if __name__ == "__main__": domains_list) if A_values[0] >= 1: - PST.__setattr__(channel, A_values) - PST.save_attribute_redis(channel, (A_values[0], - list(A_values[1]))) - - pprint.pprint(A_values) publisher.info('Url;{};{};{};Checked {} URL;{}'.format( PST.p_source, PST.p_date, PST.p_name, A_values[0], PST.p_rel_path)) diff --git a/bin/packages/Paste.py b/bin/packages/Paste.py index f6695eba..65c3ca46 100755 --- a/bin/packages/Paste.py +++ b/bin/packages/Paste.py @@ -62,7 +62,6 @@ class Paste(object): config_loader = ConfigLoader.ConfigLoader() self.cache = config_loader.get_redis_conn("Redis_Queues") - self.store = config_loader.get_redis_conn("Redis_Data_Merging") self.store_metadata = config_loader.get_redis_conn("ARDB_Metadata") self.PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) @@ -327,50 +326,27 @@ class Paste(object): def get_p_date_path(self): return self.p_date_path - def save_all_attributes_redis(self, key=None): - """ - Saving all the attributes in a "Redis-like" Database (Redis, LevelDB) - - :param r_serv: -- Connexion to the Database. - :param key: -- Key of an additionnal set. - - Example: - import redis - - r_serv = redis.StrictRedis(host = 127.0.0.1, port = 6739, db = 0) - - PST = Paste("/home/Zkopkmlk.gz") - PST.save_all_attributes_redis(r_serv) - - """ - # LevelDB Compatibility - p = self.store.pipeline(False) - p.hset(self.p_path, "p_name", self.p_name) - p.hset(self.p_path, "p_size", self.p_size) - p.hset(self.p_path, "p_mime", self.p_mime) - # p.hset(self.p_path, "p_encoding", self.p_encoding) - p.hset(self.p_path, "p_date", self._get_p_date()) - p.hset(self.p_path, "p_hash_kind", self._get_p_hash_kind()) - p.hset(self.p_path, "p_hash", self.p_hash) - # p.hset(self.p_path, "p_langage", self.p_langage) - # p.hset(self.p_path, "p_nb_lines", self.p_nb_lines) - # p.hset(self.p_path, "p_max_length_line", self.p_max_length_line) - # p.hset(self.p_path, "p_categories", self.p_categories) - p.hset(self.p_path, "p_source", self.p_source) - if key is not None: - p.sadd(key, self.p_path) - else: - pass - p.execute() - - def save_attribute_redis(self, attr_name, value): - """ - Save an attribute as a field - """ - if type(value) == set: - self.store.hset(self.p_path, attr_name, json.dumps(list(value))) - else: - self.store.hset(self.p_path, attr_name, json.dumps(value)) + # def save_all_attributes_redis(self, key=None): + # """ + # Saving all the attributes in a "Redis-like" Database (Redis, LevelDB) + # + # :param r_serv: -- Connexion to the Database. + # :param key: -- Key of an additionnal set. + # + # Example: + # import redis + # + # r_serv = redis.StrictRedis(host = 127.0.0.1, port = 6739, db = 0) + # + # PST = Paste("/home/Zkopkmlk.gz") + # PST.save_all_attributes_redis(r_serv) + # + # """ + # + # def save_attribute_redis(self, attr_name, value): + # """ + # Save an attribute as a field + # """ def save_attribute_duplicate(self, value): """ diff --git a/bin/packages/modules.cfg b/bin/packages/modules.cfg index cf65a126..ed3c466e 100644 --- a/bin/packages/modules.cfg +++ b/bin/packages/modules.cfg @@ -20,9 +20,6 @@ subscribe = Redis_Duplicate [Indexer] subscribe = Redis_Global -[Attributes] -subscribe = Redis_Global - [DomClassifier] subscribe = Redis_Global @@ -67,15 +64,17 @@ publish = Redis_Duplicate,Redis_ModuleStats,Redis_Tags [Onion] subscribe = Redis_Onion -publish = Redis_ValidOnion,ZMQ_FetchedOnion,Redis_Tags,Redis_Crawler -#publish = Redis_Global,Redis_ValidOnion,ZMQ_FetchedOnion +publish = Redis_ValidOnion,Redis_Tags,Redis_Crawler +#publish = Redis_ValidOnion,ZMQ_FetchedOnion,Redis_Tags,Redis_Crawler +# TODO remove me [DumpValidOnion] subscribe = Redis_ValidOnion [Web] subscribe = Redis_Web -publish = Redis_Url,ZMQ_Url +publish = Redis_Url +#publish = Redis_Url,ZMQ_Url [WebStats] subscribe = Redis_Url diff --git a/configs/core.cfg.sample b/configs/core.cfg.sample index c6988bbb..669550b9 100644 --- a/configs/core.cfg.sample +++ b/configs/core.cfg.sample @@ -142,11 +142,6 @@ host = localhost port = 6381 db = 0 -[Redis_Data_Merging] -host = localhost -port = 6379 -db = 1 - [Redis_Mixer_Cache] host = localhost port = 6381