fix: [redis cache] remove old Redis_Data_Merging db

pull/549/head
Terrtia 2021-02-03 14:58:27 +01:00
parent 89e95ca030
commit 3482a85410
No known key found for this signature in database
GPG Key ID: 1E1B1F50D84613D0
7 changed files with 28 additions and 126 deletions

View File

@ -1,50 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
The ZMQ_Sub_Attribute Module
============================
This module is saving Attribute of the paste into redis
"""
import time
from packages import Paste
from pubsublogger import publisher
from Helper import Process
if __name__ == "__main__":
publisher.port = 6380
publisher.channel = "Script"
config_section = 'Attributes'
p = Process(config_section)
# FUNCTIONS #
publisher.info("Attribute is Running")
while True:
try:
message = p.get_from_set()
if message is not None:
PST = Paste.Paste(message)
else:
publisher.debug("Script Attribute is idling 1s")
print('sleeping')
time.sleep(1)
continue
# FIXME do it directly in the class
PST.save_attribute_redis("p_encoding", PST._get_p_encoding())
#PST.save_attribute_redis("p_language", PST._get_p_language())
# FIXME why not all saving everything there.
PST.save_all_attributes_redis()
# FIXME Not used.
PST.store.sadd("Pastes_Objects", PST.p_rel_path)
except IOError:
print("CRC Checksum Failed on :", PST.p_rel_path)
publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format(
PST.p_source, PST.p_date, PST.p_name))

View File

@ -31,14 +31,10 @@ if __name__ == "__main__":
p = Process(config_section) p = Process(config_section)
# FUNCTIONS # # FUNCTIONS #
publisher.info("Creditcard script subscribed to channel creditcard_categ") publisher.info("CreditCards script started")
creditcard_regex = "4[0-9]{12}(?:[0-9]{3})?" creditcard_regex = "4[0-9]{12}(?:[0-9]{3})?"
# FIXME For retro compatibility
channel = 'creditcard_categ'
# Source: http://www.richardsramblings.com/regex/credit-card-numbers/ # Source: http://www.richardsramblings.com/regex/credit-card-numbers/
cards = [ cards = [
r'\b4\d{3}(?:[\ \-]?)\d{4}(?:[\ \-]?)\d{4}(?:[\ \-]?)\d{4}\b', # 16-digit VISA, with separators r'\b4\d{3}(?:[\ \-]?)\d{4}(?:[\ \-]?)\d{4}(?:[\ \-]?)\d{4}\b', # 16-digit VISA, with separators
@ -69,9 +65,6 @@ if __name__ == "__main__":
print(clean_card, 'is valid') print(clean_card, 'is valid')
creditcard_set.add(clean_card) creditcard_set.add(clean_card)
paste.__setattr__(channel, creditcard_set)
paste.save_attribute_redis(channel, creditcard_set)
pprint.pprint(creditcard_set) pprint.pprint(creditcard_set)
to_print = 'CreditCard;{};{};{};'.format( to_print = 'CreditCard;{};{};{};'.format(
paste.p_source, paste.p_date, paste.p_name) paste.p_source, paste.p_date, paste.p_name)

View File

@ -193,9 +193,6 @@ if __name__ == "__main__":
r_onion.sadd('i2p_crawler_queue', msg) r_onion.sadd('i2p_crawler_queue', msg)
''' '''
# Saving the list of extracted onion domains.
PST.__setattr__(channel, domains_list)
PST.save_attribute_redis(channel, domains_list)
to_print = 'Onion;{};{};{};'.format(PST.p_source, PST.p_date, to_print = 'Onion;{};{};{};'.format(PST.p_source, PST.p_date,
PST.p_name) PST.p_name)

View File

@ -58,10 +58,7 @@ if __name__ == "__main__":
cc_critical = p.config.get("Url", "cc_critical") cc_critical = p.config.get("Url", "cc_critical")
# FUNCTIONS # # FUNCTIONS #
publisher.info("Script URL subscribed to channel web_categ") publisher.info("Script URL Started")
# FIXME For retro compatibility
channel = 'web_categ'
message = p.get_from_set() message = p.get_from_set()
prec_filename = None prec_filename = None
@ -150,11 +147,6 @@ if __name__ == "__main__":
domains_list) domains_list)
if A_values[0] >= 1: if A_values[0] >= 1:
PST.__setattr__(channel, A_values)
PST.save_attribute_redis(channel, (A_values[0],
list(A_values[1])))
pprint.pprint(A_values) pprint.pprint(A_values)
publisher.info('Url;{};{};{};Checked {} URL;{}'.format( publisher.info('Url;{};{};{};Checked {} URL;{}'.format(
PST.p_source, PST.p_date, PST.p_name, A_values[0], PST.p_rel_path)) PST.p_source, PST.p_date, PST.p_name, A_values[0], PST.p_rel_path))

View File

@ -62,7 +62,6 @@ class Paste(object):
config_loader = ConfigLoader.ConfigLoader() config_loader = ConfigLoader.ConfigLoader()
self.cache = config_loader.get_redis_conn("Redis_Queues") self.cache = config_loader.get_redis_conn("Redis_Queues")
self.store = config_loader.get_redis_conn("Redis_Data_Merging")
self.store_metadata = config_loader.get_redis_conn("ARDB_Metadata") self.store_metadata = config_loader.get_redis_conn("ARDB_Metadata")
self.PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) self.PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes"))
@ -327,50 +326,27 @@ class Paste(object):
def get_p_date_path(self): def get_p_date_path(self):
return self.p_date_path return self.p_date_path
def save_all_attributes_redis(self, key=None): # def save_all_attributes_redis(self, key=None):
""" # """
Saving all the attributes in a "Redis-like" Database (Redis, LevelDB) # Saving all the attributes in a "Redis-like" Database (Redis, LevelDB)
#
:param r_serv: -- Connexion to the Database. # :param r_serv: -- Connexion to the Database.
:param key: -- Key of an additionnal set. # :param key: -- Key of an additionnal set.
#
Example: # Example:
import redis # import redis
#
r_serv = redis.StrictRedis(host = 127.0.0.1, port = 6739, db = 0) # r_serv = redis.StrictRedis(host = 127.0.0.1, port = 6739, db = 0)
#
PST = Paste("/home/Zkopkmlk.gz") # PST = Paste("/home/Zkopkmlk.gz")
PST.save_all_attributes_redis(r_serv) # PST.save_all_attributes_redis(r_serv)
#
""" # """
# LevelDB Compatibility #
p = self.store.pipeline(False) # def save_attribute_redis(self, attr_name, value):
p.hset(self.p_path, "p_name", self.p_name) # """
p.hset(self.p_path, "p_size", self.p_size) # Save an attribute as a field
p.hset(self.p_path, "p_mime", self.p_mime) # """
# p.hset(self.p_path, "p_encoding", self.p_encoding)
p.hset(self.p_path, "p_date", self._get_p_date())
p.hset(self.p_path, "p_hash_kind", self._get_p_hash_kind())
p.hset(self.p_path, "p_hash", self.p_hash)
# p.hset(self.p_path, "p_langage", self.p_langage)
# p.hset(self.p_path, "p_nb_lines", self.p_nb_lines)
# p.hset(self.p_path, "p_max_length_line", self.p_max_length_line)
# p.hset(self.p_path, "p_categories", self.p_categories)
p.hset(self.p_path, "p_source", self.p_source)
if key is not None:
p.sadd(key, self.p_path)
else:
pass
p.execute()
def save_attribute_redis(self, attr_name, value):
"""
Save an attribute as a field
"""
if type(value) == set:
self.store.hset(self.p_path, attr_name, json.dumps(list(value)))
else:
self.store.hset(self.p_path, attr_name, json.dumps(value))
def save_attribute_duplicate(self, value): def save_attribute_duplicate(self, value):
""" """

View File

@ -20,9 +20,6 @@ subscribe = Redis_Duplicate
[Indexer] [Indexer]
subscribe = Redis_Global subscribe = Redis_Global
[Attributes]
subscribe = Redis_Global
[DomClassifier] [DomClassifier]
subscribe = Redis_Global subscribe = Redis_Global
@ -67,15 +64,17 @@ publish = Redis_Duplicate,Redis_ModuleStats,Redis_Tags
[Onion] [Onion]
subscribe = Redis_Onion subscribe = Redis_Onion
publish = Redis_ValidOnion,ZMQ_FetchedOnion,Redis_Tags,Redis_Crawler publish = Redis_ValidOnion,Redis_Tags,Redis_Crawler
#publish = Redis_Global,Redis_ValidOnion,ZMQ_FetchedOnion #publish = Redis_ValidOnion,ZMQ_FetchedOnion,Redis_Tags,Redis_Crawler
# TODO remove me
[DumpValidOnion] [DumpValidOnion]
subscribe = Redis_ValidOnion subscribe = Redis_ValidOnion
[Web] [Web]
subscribe = Redis_Web subscribe = Redis_Web
publish = Redis_Url,ZMQ_Url publish = Redis_Url
#publish = Redis_Url,ZMQ_Url
[WebStats] [WebStats]
subscribe = Redis_Url subscribe = Redis_Url

View File

@ -142,11 +142,6 @@ host = localhost
port = 6381 port = 6381
db = 0 db = 0
[Redis_Data_Merging]
host = localhost
port = 6379
db = 1
[Redis_Mixer_Cache] [Redis_Mixer_Cache]
host = localhost host = localhost
port = 6381 port = 6381