From a17ab905118156a9f1093f05adeb220d375c2a94 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Mon, 14 Jun 2021 17:36:30 +0200 Subject: [PATCH] chg: [Tracker term] track terms by sources --- bin/lib/Tracker.py | 59 ++++++++++++++++++++++++++++++------ bin/lib/item_basic.py | 21 ++++++++----- bin/packages/Item.py | 11 +++---- bin/packages/Term.py | 3 ++ bin/trackers/Tracker_Term.py | 57 ++++++++++++++++++---------------- 5 files changed, 102 insertions(+), 49 deletions(-) diff --git a/bin/lib/Tracker.py b/bin/lib/Tracker.py index 844e1093..1e722dd8 100755 --- a/bin/lib/Tracker.py +++ b/bin/lib/Tracker.py @@ -74,7 +74,7 @@ def get_tracker_level(tracker_uuid): def get_tracker_user_id(tracker_uuid): return r_serv_tracker.hget('tracker:{}'.format(tracker_uuid), 'user_id') -def get_tracker_uuid_list(tracker, tracker_type): +def get_tracker_uuid_list(tracker, tracker_type): ######################################################### USE ME return list(r_serv_tracker.smembers('all:tracker_uuid:{}:{}'.format(tracker_type, tracker))) def get_tracker_tags(tracker_uuid): @@ -83,6 +83,9 @@ def get_tracker_tags(tracker_uuid): def get_tracker_mails(tracker_uuid): return list(r_serv_tracker.smembers('tracker:mail:{}'.format(tracker_uuid))) +def get_tracker_uuid_sources(tracker_uuid): + return list(r_serv_tracker.smembers(f'tracker:sources:{tracker_uuid}')) + def get_tracker_description(tracker_uuid): return r_serv_tracker.hget('tracker:{}'.format(tracker_uuid), 'description') @@ -133,10 +136,11 @@ def get_tracker_sparkline(tracker_uuid, num_day=6): def add_tracked_item(tracker_uuid, item_id, item_date): # track item - r_serv_tracker.sadd('tracker:item:{}:{}'.format(tracker_uuid, item_date), item_id) + res = r_serv_tracker.sadd(f'tracker:item:{tracker_uuid}:{item_date}', item_id) # track nb item by date - r_serv_tracker.zadd('tracker:stat:{}'.format(tracker_uuid), item_date, int(item_date)) - + if res == 1: + r_serv_tracker.zadd('tracker:stat:{}'.format(tracker_uuid), item_date, int(item_date)) +bin/lib/Tracker.py def get_email_subject(tracker_uuid): tracker_description = get_tracker_description(tracker_uuid) if not tracker_description: @@ -150,6 +154,10 @@ def get_tracker_last_updated_by_type(tracker_type): epoch_update = 0 return float(epoch_update) +# # TODO: check type API +def trigger_trackers_refresh(tracker_type): + r_serv_tracker.set(f'tracker:refresh:{tracker_type}', time.time()) + ###################### #### TRACKERS ACL #### @@ -235,7 +243,7 @@ def api_validate_tracker_to_add(tracker , tracker_type, nb_words=1): return ({"status": "error", "reason": "Incorrect type"}, 400) return ({"status": "success", "tracker": tracker, "type": tracker_type}, 200) -def create_tracker(tracker, tracker_type, user_id, level, tags, mails, description, dashboard=0, tracker_uuid=None): +def create_tracker(tracker, tracker_type, user_id, level, tags, mails, description, dashboard=0, tracker_uuid=None, sources=[]): # edit tracker if tracker_uuid: edit_tracker = True @@ -255,7 +263,7 @@ def create_tracker(tracker, tracker_type, user_id, level, tags, mails, descripti # YARA if tracker_type == 'yara_custom' or tracker_type == 'yara_default': - # delete yara rule + # create yara rule if tracker_type == 'yara_default' and old_type == 'yara': if not is_default_yara_rule(old_tracker): filepath = get_yara_rule_file_by_tracker_name(old_tracker) @@ -318,6 +326,11 @@ def create_tracker(tracker, tracker_type, user_id, level, tags, mails, descripti for mail in mails: r_serv_tracker.sadd('tracker:mail:{}'.format(tracker_uuid), escape(mail) ) + # create tracker sources filter + for source in sources: + # escape source ? + r_serv_tracker.sadd(f'tracker:sources:{tracker_uuid}', escape(source) ) + # toggle refresh module tracker list/set r_serv_tracker.set('tracker:refresh:{}'.format(tracker_type), time.time()) if tracker_type != old_type: # toggle old type refresh @@ -346,6 +359,7 @@ def api_add_tracker(dict_input, user_id): res = verify_mail_list(mails) if res: return res + sources = dict_input.get('sources', []) ## TODO: add dashboard key level = dict_input.get('level', 1) @@ -371,7 +385,7 @@ def api_add_tracker(dict_input, user_id): if is_tracker_in_user_level(tracker, tracker_type, user_id) and not tracker_uuid: return ({"status": "error", "reason": "Tracker already exist"}, 409) - tracker_uuid = create_tracker(tracker , tracker_type, user_id, level, tags, mails, description, tracker_uuid=tracker_uuid) + tracker_uuid = create_tracker(tracker , tracker_type, user_id, level, tags, mails, description, tracker_uuid=tracker_uuid, sources=sources) return ({'tracker': tracker, 'type': tracker_type, 'uuid': tracker_uuid}, 200) @@ -407,10 +421,12 @@ def get_all_default_yara_rules_by_type(yara_types): else: return [] -def get_all_tracked_yara_files(): +def get_all_tracked_yara_files(filter_disabled=False): yara_files = r_serv_tracker.smembers('all:tracker:yara') if not yara_files: yara_files = [] + if filter_disabled: + pass return yara_files def reload_yara_rules(): @@ -424,6 +440,22 @@ def reload_yara_rules(): rules = yara.compile(filepaths=rule_dict) return rules +# # TODO: +# Avoid useless CHECK +# Empty list == ALL SOURCES +# FIXME MOOVE ME +def get_tracker_sources(tracker, tracker_type): + l_sources = set() + for tracker_uuid in get_tracker_uuid_list(tracker, tracker_type): + sources = get_tracker_uuid_sources(tracker_uuid) + if sources: + for source in get_tracker_uuid_sources(tracker_uuid): + l_sources.add(source) + else: + l_sources = [] + break + return l_sources + def is_valid_yara_rule(yara_rule): try: yara.compile(source=yara_rule) @@ -518,5 +550,14 @@ def api_get_default_rule_content(default_yara_rule): ##-- YARA --## if __name__ == '__main__': - res = is_valid_yara_rule('rule dummy { }') + #res = is_valid_yara_rule('rule dummy { }') + + # res = create_tracker('test', 'word', 'admin@admin.test', 1, [], [], None, sources=['crawled', 'pastebin.com', 'rt/pastebin.com']) + res = create_tracker('test', 'word', 'admin@admin.test', 1, [], [], None) + # print(res) + + t_uuid = '1c2d35b0-9330-4feb-b454-da13007aa9f7' + res = get_tracker_sources('test', 'word') + + print(res) diff --git a/bin/lib/item_basic.py b/bin/lib/item_basic.py index 1b7e0de5..e9145117 100755 --- a/bin/lib/item_basic.py +++ b/bin/lib/item_basic.py @@ -64,7 +64,8 @@ def get_item_content(item_id): item_content = f.read().decode() r_cache.set(item_full_path, item_content) r_cache.expire(item_full_path, 300) - except: + except Exception as e: + print(e) item_content = '' return str(item_content) @@ -176,7 +177,7 @@ def add_map_obj_id_item_id(obj_id, item_id, obj_type): ##-- --## ## COMMON ## -def _get_dir_source_name(directory, source_name=None, l_sources_name=set()): +def _get_dir_source_name(directory, source_name=None, l_sources_name=set(), filter_dir=False): if source_name: l_dir = os.listdir(os.path.join(directory, source_name)) else: @@ -188,12 +189,16 @@ def _get_dir_source_name(directory, source_name=None, l_sources_name=set()): else: for src_name in l_dir: if len(src_name) == 4: - try: - int(src_name) - l_sources_name.add(os.path.join(source_name)) - return l_sources_name - except: - pass + #try: + int(src_name) + to_add = os.path.join(source_name) + # filter sources, remove first directory + if filter_dir: + to_add = to_add.replace('archive/', '').replace('alerts/', '') + l_sources_name.add(to_add) + return l_sources_name + #except: + # pass if source_name: src_name = os.path.join(source_name, src_name) l_sources_name = _get_dir_source_name(directory, source_name=src_name, l_sources_name=l_sources_name) diff --git a/bin/packages/Item.py b/bin/packages/Item.py index 5ecc85f2..6ca7ba43 100755 --- a/bin/packages/Item.py +++ b/bin/packages/Item.py @@ -570,7 +570,9 @@ class Item(AbstractObject): """ Returns Item source/feeder name """ - return item_basic.get_source(self.id) + #return self.id.split('/')[-5] + l_source = self.id.split('/')[:-4] + return os.path.join(*l_source) def get_basename(self): return os.path.basename(self.id) @@ -605,11 +607,8 @@ class Item(AbstractObject): except FileNotFoundError: return False -# if __name__ == '__main__': -# -# item = Item('') -# res = item.get_date(separator=True) -# print(res) +#if __name__ == '__main__': + # import Domain # domain = Domain.Domain('domain.onion') diff --git a/bin/packages/Term.py b/bin/packages/Term.py index 45b8d639..04d9a472 100755 --- a/bin/packages/Term.py +++ b/bin/packages/Term.py @@ -319,6 +319,9 @@ def delete_term(term_uuid): # remove mails r_serv_term.delete('tracker:mail:{}'.format(term_uuid)) + # remove sources + r_serv_term.delete('tracker:sources:{}'.format(term_uuid)) + # remove item set all_item_date = r_serv_term.zrange('tracker:stat:{}'.format(term_uuid), 0, -1) for date in all_item_date: diff --git a/bin/trackers/Tracker_Term.py b/bin/trackers/Tracker_Term.py index 9878835e..fa9553ac 100755 --- a/bin/trackers/Tracker_Term.py +++ b/bin/trackers/Tracker_Term.py @@ -90,46 +90,51 @@ class Tracker_Term(AbstractModule): # create token statistics #for word in dict_words_freq: # Term.create_token_statistics(item_date, word, dict_words_freq[word]) + item_source = item.get_source() # check solo words + ####### # TODO: check if source needed ####### for word in self.list_tracked_words: if word in dict_words_freq: - self.new_term_found(word, 'word', item.get_id(), item_date) + self.new_term_found(word, 'word', item.get_id(), item_date, item_source) - # check words set - for elem in self.set_tracked_words_list: - list_words = elem[0] - nb_words_threshold = elem[1] - word_set = elem[2] - nb_uniq_word = 0 + # check words set + for elem in self.set_tracked_words_list: + list_words = elem[0] + nb_words_threshold = elem[1] + word_set = elem[2] + nb_uniq_word = 0 - for word in list_words: - if word in dict_words_freq: - nb_uniq_word += 1 - if nb_uniq_word >= nb_words_threshold: - self.new_term_found(word_set, 'set', item.get_id(), item_date) + for word in list_words: + if word in dict_words_freq: + nb_uniq_word += 1 + if nb_uniq_word >= nb_words_threshold: + self.new_term_found(word_set, 'set', item.get_id(), item_date, item_source) - def new_term_found(self, term, term_type, item_id, item_date): + def new_term_found(self, term, term_type, item_id, item_date, item_source): uuid_list = Term.get_term_uuid_list(term, term_type) self.redis_logger.info(f'new tracked term found: {term} in {item_id}') print(f'new tracked term found: {term} in {item_id}') for term_uuid in uuid_list: - Term.add_tracked_item(term_uuid, item_id, item_date) + tracker_sources = Tracker.get_tracker_uuid_sources(term_uuid) + if not tracker_sources or item_source in tracker_sources: + print(not tracker_sources or item_source in tracker_sources) + Tracker.add_tracked_item(term_uuid, item_id, item_date) - tags_to_add = Term.get_term_tags(term_uuid) - for tag in tags_to_add: - msg = '{};{}'.format(tag, item_id) - self.send_message_to_queue(msg, 'Tags') + tags_to_add = Term.get_term_tags(term_uuid) + for tag in tags_to_add: + msg = '{};{}'.format(tag, item_id) + self.send_message_to_queue(msg, 'Tags') - mail_to_notify = Term.get_term_mails(term_uuid) - if mail_to_notify: - mail_subject = Tracker.get_email_subject(term_uuid) - mail_body = Tracker_Term.mail_body_template.format(term, item_id, self.full_item_url, item_id) - for mail in mail_to_notify: - self.redis_logger.debug(f'Send Mail {mail_subject}') - print(f'Send Mail {mail_subject}') - NotificationHelper.sendEmailNotification(mail, mail_subject, mail_body) + mail_to_notify = Term.get_term_mails(term_uuid) + if mail_to_notify: + mail_subject = Tracker.get_email_subject(term_uuid) + mail_body = Tracker_Term.mail_body_template.format(term, item_id, self.full_item_url, item_id) + for mail in mail_to_notify: + self.redis_logger.debug(f'Send Mail {mail_subject}') + print(f'S print(item_content)end Mail {mail_subject}') + NotificationHelper.sendEmailNotification(mail, mail_subject, mail_body) if __name__ == '__main__':