mirror of https://github.com/CIRCL/AIL-framework
chg: [categ] messages, bypass categ module + fix correlation
parent
5b808ed416
commit
73185f19fd
|
@ -59,7 +59,7 @@ CORRELATION_TYPES_BY_OBJ = {
|
||||||
"pgp": ["domain", "item", "message"],
|
"pgp": ["domain", "item", "message"],
|
||||||
"screenshot": ["domain", "item"],
|
"screenshot": ["domain", "item"],
|
||||||
"title": ["domain", "item"],
|
"title": ["domain", "item"],
|
||||||
"user-account": ["chat", "chat-subchannel", "chat-thread", "message"],
|
"user-account": ["chat", "chat-subchannel", "chat-thread", "image", "message"],
|
||||||
"username": ["domain", "item", "message"], # TODO chat-user/account
|
"username": ["domain", "item", "message"], # TODO chat-user/account
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -6,14 +6,14 @@ The ZMQ_PubSub_Categ Module
|
||||||
|
|
||||||
Each words files created under /files/ are representing categories.
|
Each words files created under /files/ are representing categories.
|
||||||
This modules take these files and compare them to
|
This modules take these files and compare them to
|
||||||
the content of an item.
|
the content of an obj.
|
||||||
|
|
||||||
When a word from a item match one or more of these words file, the filename of
|
When a word from a obj match one or more of these words file, the filename of
|
||||||
the item / zhe item id is published/forwarded to the next modules.
|
the obj / the obj id is published/forwarded to the next modules.
|
||||||
|
|
||||||
Each category (each files) are representing a dynamic channel.
|
Each category (each files) are representing a dynamic channel.
|
||||||
This mean that if you create 1000 files under /files/ you'll have 1000 channels
|
This mean that if you create 1000 files under /files/ you'll have 1000 channels
|
||||||
where every time there is a matching word to a category, the item containing
|
where every time there is a matching word to a category, the obj containing
|
||||||
this word will be pushed to this specific channel.
|
this word will be pushed to this specific channel.
|
||||||
|
|
||||||
..note:: The channel will have the name of the file created.
|
..note:: The channel will have the name of the file created.
|
||||||
|
@ -44,7 +44,6 @@ sys.path.append(os.environ['AIL_BIN'])
|
||||||
##################################
|
##################################
|
||||||
from modules.abstract_module import AbstractModule
|
from modules.abstract_module import AbstractModule
|
||||||
from lib.ConfigLoader import ConfigLoader
|
from lib.ConfigLoader import ConfigLoader
|
||||||
from lib.objects.Items import Item
|
|
||||||
|
|
||||||
|
|
||||||
class Categ(AbstractModule):
|
class Categ(AbstractModule):
|
||||||
|
@ -81,27 +80,32 @@ class Categ(AbstractModule):
|
||||||
self.categ_words = tmp_dict.items()
|
self.categ_words = tmp_dict.items()
|
||||||
|
|
||||||
def compute(self, message, r_result=False):
|
def compute(self, message, r_result=False):
|
||||||
# Create Item Object
|
# Get obj Object
|
||||||
item = self.get_obj()
|
obj = self.get_obj()
|
||||||
# Get item content
|
# Get obj content
|
||||||
content = item.get_content()
|
content = obj.get_content()
|
||||||
categ_found = []
|
categ_found = []
|
||||||
|
|
||||||
# Search for pattern categories in item content
|
# Search for pattern categories in obj content
|
||||||
for categ, pattern in self.categ_words:
|
for categ, pattern in self.categ_words:
|
||||||
|
|
||||||
found = set(re.findall(pattern, content))
|
if obj.type == 'message':
|
||||||
lenfound = len(found)
|
self.add_message_to_queue(message='0', queue=categ)
|
||||||
if lenfound >= self.matchingThreshold:
|
else:
|
||||||
categ_found.append(categ)
|
|
||||||
msg = str(lenfound)
|
|
||||||
|
|
||||||
# Export message to categ queue
|
found = set(re.findall(pattern, content))
|
||||||
print(msg, categ)
|
lenfound = len(found)
|
||||||
self.add_message_to_queue(message=msg, queue=categ)
|
if lenfound >= self.matchingThreshold:
|
||||||
|
categ_found.append(categ)
|
||||||
|
msg = str(lenfound)
|
||||||
|
|
||||||
|
# Export message to categ queue
|
||||||
|
print(msg, categ)
|
||||||
|
self.add_message_to_queue(message=msg, queue=categ)
|
||||||
|
|
||||||
|
self.redis_logger.debug(
|
||||||
|
f'Categ;{obj.get_source()};{obj.get_date()};{obj.get_basename()};Detected {lenfound} as {categ};{obj.get_id()}')
|
||||||
|
|
||||||
self.redis_logger.debug(
|
|
||||||
f'Categ;{item.get_source()};{item.get_date()};{item.get_basename()};Detected {lenfound} as {categ};{item.get_id()}')
|
|
||||||
if r_result:
|
if r_result:
|
||||||
return categ_found
|
return categ_found
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue