mirror of https://github.com/CIRCL/AIL-framework
fix: [redis cache] remove old Redis_Data_Merging db
parent
89e95ca030
commit
3482a85410
|
@ -1,50 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
# -*-coding:UTF-8 -*
|
|
||||||
|
|
||||||
"""
|
|
||||||
The ZMQ_Sub_Attribute Module
|
|
||||||
============================
|
|
||||||
|
|
||||||
This module is saving Attribute of the paste into redis
|
|
||||||
|
|
||||||
"""
|
|
||||||
import time
|
|
||||||
from packages import Paste
|
|
||||||
from pubsublogger import publisher
|
|
||||||
|
|
||||||
from Helper import Process
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
publisher.port = 6380
|
|
||||||
publisher.channel = "Script"
|
|
||||||
|
|
||||||
config_section = 'Attributes'
|
|
||||||
|
|
||||||
p = Process(config_section)
|
|
||||||
|
|
||||||
# FUNCTIONS #
|
|
||||||
publisher.info("Attribute is Running")
|
|
||||||
|
|
||||||
while True:
|
|
||||||
try:
|
|
||||||
message = p.get_from_set()
|
|
||||||
|
|
||||||
if message is not None:
|
|
||||||
PST = Paste.Paste(message)
|
|
||||||
else:
|
|
||||||
publisher.debug("Script Attribute is idling 1s")
|
|
||||||
print('sleeping')
|
|
||||||
time.sleep(1)
|
|
||||||
continue
|
|
||||||
|
|
||||||
# FIXME do it directly in the class
|
|
||||||
PST.save_attribute_redis("p_encoding", PST._get_p_encoding())
|
|
||||||
#PST.save_attribute_redis("p_language", PST._get_p_language())
|
|
||||||
# FIXME why not all saving everything there.
|
|
||||||
PST.save_all_attributes_redis()
|
|
||||||
# FIXME Not used.
|
|
||||||
PST.store.sadd("Pastes_Objects", PST.p_rel_path)
|
|
||||||
except IOError:
|
|
||||||
print("CRC Checksum Failed on :", PST.p_rel_path)
|
|
||||||
publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format(
|
|
||||||
PST.p_source, PST.p_date, PST.p_name))
|
|
|
@ -31,14 +31,10 @@ if __name__ == "__main__":
|
||||||
p = Process(config_section)
|
p = Process(config_section)
|
||||||
|
|
||||||
# FUNCTIONS #
|
# FUNCTIONS #
|
||||||
publisher.info("Creditcard script subscribed to channel creditcard_categ")
|
publisher.info("CreditCards script started")
|
||||||
|
|
||||||
|
|
||||||
creditcard_regex = "4[0-9]{12}(?:[0-9]{3})?"
|
creditcard_regex = "4[0-9]{12}(?:[0-9]{3})?"
|
||||||
|
|
||||||
# FIXME For retro compatibility
|
|
||||||
channel = 'creditcard_categ'
|
|
||||||
|
|
||||||
# Source: http://www.richardsramblings.com/regex/credit-card-numbers/
|
# Source: http://www.richardsramblings.com/regex/credit-card-numbers/
|
||||||
cards = [
|
cards = [
|
||||||
r'\b4\d{3}(?:[\ \-]?)\d{4}(?:[\ \-]?)\d{4}(?:[\ \-]?)\d{4}\b', # 16-digit VISA, with separators
|
r'\b4\d{3}(?:[\ \-]?)\d{4}(?:[\ \-]?)\d{4}(?:[\ \-]?)\d{4}\b', # 16-digit VISA, with separators
|
||||||
|
@ -69,9 +65,6 @@ if __name__ == "__main__":
|
||||||
print(clean_card, 'is valid')
|
print(clean_card, 'is valid')
|
||||||
creditcard_set.add(clean_card)
|
creditcard_set.add(clean_card)
|
||||||
|
|
||||||
paste.__setattr__(channel, creditcard_set)
|
|
||||||
paste.save_attribute_redis(channel, creditcard_set)
|
|
||||||
|
|
||||||
pprint.pprint(creditcard_set)
|
pprint.pprint(creditcard_set)
|
||||||
to_print = 'CreditCard;{};{};{};'.format(
|
to_print = 'CreditCard;{};{};{};'.format(
|
||||||
paste.p_source, paste.p_date, paste.p_name)
|
paste.p_source, paste.p_date, paste.p_name)
|
||||||
|
|
|
@ -193,9 +193,6 @@ if __name__ == "__main__":
|
||||||
r_onion.sadd('i2p_crawler_queue', msg)
|
r_onion.sadd('i2p_crawler_queue', msg)
|
||||||
'''
|
'''
|
||||||
|
|
||||||
# Saving the list of extracted onion domains.
|
|
||||||
PST.__setattr__(channel, domains_list)
|
|
||||||
PST.save_attribute_redis(channel, domains_list)
|
|
||||||
to_print = 'Onion;{};{};{};'.format(PST.p_source, PST.p_date,
|
to_print = 'Onion;{};{};{};'.format(PST.p_source, PST.p_date,
|
||||||
PST.p_name)
|
PST.p_name)
|
||||||
|
|
||||||
|
|
10
bin/Web.py
10
bin/Web.py
|
@ -58,10 +58,7 @@ if __name__ == "__main__":
|
||||||
cc_critical = p.config.get("Url", "cc_critical")
|
cc_critical = p.config.get("Url", "cc_critical")
|
||||||
|
|
||||||
# FUNCTIONS #
|
# FUNCTIONS #
|
||||||
publisher.info("Script URL subscribed to channel web_categ")
|
publisher.info("Script URL Started")
|
||||||
|
|
||||||
# FIXME For retro compatibility
|
|
||||||
channel = 'web_categ'
|
|
||||||
|
|
||||||
message = p.get_from_set()
|
message = p.get_from_set()
|
||||||
prec_filename = None
|
prec_filename = None
|
||||||
|
@ -150,11 +147,6 @@ if __name__ == "__main__":
|
||||||
domains_list)
|
domains_list)
|
||||||
|
|
||||||
if A_values[0] >= 1:
|
if A_values[0] >= 1:
|
||||||
PST.__setattr__(channel, A_values)
|
|
||||||
PST.save_attribute_redis(channel, (A_values[0],
|
|
||||||
list(A_values[1])))
|
|
||||||
|
|
||||||
|
|
||||||
pprint.pprint(A_values)
|
pprint.pprint(A_values)
|
||||||
publisher.info('Url;{};{};{};Checked {} URL;{}'.format(
|
publisher.info('Url;{};{};{};Checked {} URL;{}'.format(
|
||||||
PST.p_source, PST.p_date, PST.p_name, A_values[0], PST.p_rel_path))
|
PST.p_source, PST.p_date, PST.p_name, A_values[0], PST.p_rel_path))
|
||||||
|
|
|
@ -62,7 +62,6 @@ class Paste(object):
|
||||||
|
|
||||||
config_loader = ConfigLoader.ConfigLoader()
|
config_loader = ConfigLoader.ConfigLoader()
|
||||||
self.cache = config_loader.get_redis_conn("Redis_Queues")
|
self.cache = config_loader.get_redis_conn("Redis_Queues")
|
||||||
self.store = config_loader.get_redis_conn("Redis_Data_Merging")
|
|
||||||
self.store_metadata = config_loader.get_redis_conn("ARDB_Metadata")
|
self.store_metadata = config_loader.get_redis_conn("ARDB_Metadata")
|
||||||
|
|
||||||
self.PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes"))
|
self.PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes"))
|
||||||
|
@ -327,50 +326,27 @@ class Paste(object):
|
||||||
def get_p_date_path(self):
|
def get_p_date_path(self):
|
||||||
return self.p_date_path
|
return self.p_date_path
|
||||||
|
|
||||||
def save_all_attributes_redis(self, key=None):
|
# def save_all_attributes_redis(self, key=None):
|
||||||
"""
|
# """
|
||||||
Saving all the attributes in a "Redis-like" Database (Redis, LevelDB)
|
# Saving all the attributes in a "Redis-like" Database (Redis, LevelDB)
|
||||||
|
#
|
||||||
:param r_serv: -- Connexion to the Database.
|
# :param r_serv: -- Connexion to the Database.
|
||||||
:param key: -- Key of an additionnal set.
|
# :param key: -- Key of an additionnal set.
|
||||||
|
#
|
||||||
Example:
|
# Example:
|
||||||
import redis
|
# import redis
|
||||||
|
#
|
||||||
r_serv = redis.StrictRedis(host = 127.0.0.1, port = 6739, db = 0)
|
# r_serv = redis.StrictRedis(host = 127.0.0.1, port = 6739, db = 0)
|
||||||
|
#
|
||||||
PST = Paste("/home/Zkopkmlk.gz")
|
# PST = Paste("/home/Zkopkmlk.gz")
|
||||||
PST.save_all_attributes_redis(r_serv)
|
# PST.save_all_attributes_redis(r_serv)
|
||||||
|
#
|
||||||
"""
|
# """
|
||||||
# LevelDB Compatibility
|
#
|
||||||
p = self.store.pipeline(False)
|
# def save_attribute_redis(self, attr_name, value):
|
||||||
p.hset(self.p_path, "p_name", self.p_name)
|
# """
|
||||||
p.hset(self.p_path, "p_size", self.p_size)
|
# Save an attribute as a field
|
||||||
p.hset(self.p_path, "p_mime", self.p_mime)
|
# """
|
||||||
# p.hset(self.p_path, "p_encoding", self.p_encoding)
|
|
||||||
p.hset(self.p_path, "p_date", self._get_p_date())
|
|
||||||
p.hset(self.p_path, "p_hash_kind", self._get_p_hash_kind())
|
|
||||||
p.hset(self.p_path, "p_hash", self.p_hash)
|
|
||||||
# p.hset(self.p_path, "p_langage", self.p_langage)
|
|
||||||
# p.hset(self.p_path, "p_nb_lines", self.p_nb_lines)
|
|
||||||
# p.hset(self.p_path, "p_max_length_line", self.p_max_length_line)
|
|
||||||
# p.hset(self.p_path, "p_categories", self.p_categories)
|
|
||||||
p.hset(self.p_path, "p_source", self.p_source)
|
|
||||||
if key is not None:
|
|
||||||
p.sadd(key, self.p_path)
|
|
||||||
else:
|
|
||||||
pass
|
|
||||||
p.execute()
|
|
||||||
|
|
||||||
def save_attribute_redis(self, attr_name, value):
|
|
||||||
"""
|
|
||||||
Save an attribute as a field
|
|
||||||
"""
|
|
||||||
if type(value) == set:
|
|
||||||
self.store.hset(self.p_path, attr_name, json.dumps(list(value)))
|
|
||||||
else:
|
|
||||||
self.store.hset(self.p_path, attr_name, json.dumps(value))
|
|
||||||
|
|
||||||
def save_attribute_duplicate(self, value):
|
def save_attribute_duplicate(self, value):
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -20,9 +20,6 @@ subscribe = Redis_Duplicate
|
||||||
[Indexer]
|
[Indexer]
|
||||||
subscribe = Redis_Global
|
subscribe = Redis_Global
|
||||||
|
|
||||||
[Attributes]
|
|
||||||
subscribe = Redis_Global
|
|
||||||
|
|
||||||
[DomClassifier]
|
[DomClassifier]
|
||||||
subscribe = Redis_Global
|
subscribe = Redis_Global
|
||||||
|
|
||||||
|
@ -67,15 +64,17 @@ publish = Redis_Duplicate,Redis_ModuleStats,Redis_Tags
|
||||||
|
|
||||||
[Onion]
|
[Onion]
|
||||||
subscribe = Redis_Onion
|
subscribe = Redis_Onion
|
||||||
publish = Redis_ValidOnion,ZMQ_FetchedOnion,Redis_Tags,Redis_Crawler
|
publish = Redis_ValidOnion,Redis_Tags,Redis_Crawler
|
||||||
#publish = Redis_Global,Redis_ValidOnion,ZMQ_FetchedOnion
|
#publish = Redis_ValidOnion,ZMQ_FetchedOnion,Redis_Tags,Redis_Crawler
|
||||||
|
|
||||||
|
# TODO remove me
|
||||||
[DumpValidOnion]
|
[DumpValidOnion]
|
||||||
subscribe = Redis_ValidOnion
|
subscribe = Redis_ValidOnion
|
||||||
|
|
||||||
[Web]
|
[Web]
|
||||||
subscribe = Redis_Web
|
subscribe = Redis_Web
|
||||||
publish = Redis_Url,ZMQ_Url
|
publish = Redis_Url
|
||||||
|
#publish = Redis_Url,ZMQ_Url
|
||||||
|
|
||||||
[WebStats]
|
[WebStats]
|
||||||
subscribe = Redis_Url
|
subscribe = Redis_Url
|
||||||
|
|
|
@ -142,11 +142,6 @@ host = localhost
|
||||||
port = 6381
|
port = 6381
|
||||||
db = 0
|
db = 0
|
||||||
|
|
||||||
[Redis_Data_Merging]
|
|
||||||
host = localhost
|
|
||||||
port = 6379
|
|
||||||
db = 1
|
|
||||||
|
|
||||||
[Redis_Mixer_Cache]
|
[Redis_Mixer_Cache]
|
||||||
host = localhost
|
host = localhost
|
||||||
port = 6381
|
port = 6381
|
||||||
|
|
Loading…
Reference in New Issue