From b9463e513cbabb58bd35b28f4301eb8891cb86b6 Mon Sep 17 00:00:00 2001 From: edhoedt Date: Mon, 29 Apr 2019 19:23:14 +0200 Subject: [PATCH] Yara export --- app/Lib/Export/YaraExport.php | 141 +++++ app/Model/Attribute.php | 2 + app/Model/Event.php | 18 + app/files/scripts/yara/misp2yara.py | 549 ++++++++++++++++++++ app/files/scripts/yara/permissive_plyara.py | 157 ++++++ app/files/scripts/yara/yaraexport.py | 94 ++++ app/files/scripts/yara/yaratemplate.py | 352 +++++++++++++ app/tmp/yara/empty | 0 requirements.txt | 1 + 9 files changed, 1314 insertions(+) create mode 100644 app/Lib/Export/YaraExport.php create mode 100644 app/files/scripts/yara/misp2yara.py create mode 100644 app/files/scripts/yara/permissive_plyara.py create mode 100644 app/files/scripts/yara/yaraexport.py create mode 100644 app/files/scripts/yara/yaratemplate.py create mode 100644 app/tmp/yara/empty diff --git a/app/Lib/Export/YaraExport.php b/app/Lib/Export/YaraExport.php new file mode 100644 index 000000000..ef4faadab --- /dev/null +++ b/app/Lib/Export/YaraExport.php @@ -0,0 +1,141 @@ +>' . APP . 'tmp/logs/yara_export.log'; + private $__n_attributes = 0; + private $__MAX_n_attributes = 15000; + private $__yara_file_gen = null; + private $__yara_file_asis = null; + private $__curr_input_file = null; + private $__scope = false; + private $__curr_input_is_empty = true; + private $__JsonExporter = false; + private $__raw_mode = true; + + public $non_restrictive_export = true; + + private static function __count_atributes($data) + { + $attributes_count = count($data['Attribute']); + // foreach ($data['Object'] as $_object) { + // $attributes_count += count($_object['Attribute']); + // } + } + + public function header($options = array()) + { + if($this->__JsonExporter === false){ + $this->__JsonExporter = new JsonExport(); + } + $this->__initialize_yara_file(); + $this->__initialize_misp_file($options); + if($options['returnFormat'] === 'yara-json'){ + $this->__raw_mode = false; + } + return ''; + } + + private function __initialize_yara_file() + { + $yaraFileName = $this->generateRandomFileName(); + $this->__yara_file_gen = new File($this->__tmp_dir . $yaraFileName . '_generated', true, 0644); + $this->__yara_file_asis = new File($this->__tmp_dir . $yaraFileName . '_asis', true, 0644); + $this->__yara_file_gen->close(); + $this->__yara_file_asis->close(); + } + + private function __initialize_misp_file($options) + { + $mispFileName = $this->generateRandomFileName(); + $this->__curr_input_file = new File($this->__tmp_dir . $mispFileName, true, 0644); + $header = $this->__JsonExporter->header($options); + $this->__curr_input_file->append($header); + $this->__curr_input_is_empty = true; + } + + public function handler($data, $options = array()) + { + // convert attribute(s) to json and write them to input queue file + if ($options['scope'] === 'Attribute') { + $attr_count = 1; + } else if($options['scope'] === 'Event') { + $attr_count = YaraExport::__count_atributes($data); + } + if(!empty($data)){ + if(!$this->__curr_input_is_empty){ + $this->separator(); // calling separator since returning '' will prevent it + } + $jsonData = $this->__JsonExporter->handler($data, $options); + $this->__curr_input_file->append($jsonData); + $this->__curr_input_is_empty = false; + } + $this->__n_attributes += $attr_count; + // if the file exceeds the max_attributes, process it, delete it and reset the counter + if ($this->__n_attributes >= $this->__MAX_n_attributes){ + $this->__process_file($options); + $this->__initialize_misp_file($options); + } + return ''; + } + + public function footer($options = array()) + { + if(!($this->__curr_input_is_empty)){ + $this->__process_file($options); + } + $file = new File($this->__yara_file_gen->path); + $data_gen = $file->read(true, 'r'); + $file->close(); + $file->delete(); + $file = new File($this->__yara_file_asis->path); + $data_asis = $file->read(true, 'r'); + $file->close(); + $file->delete(); + if($this->__raw_mode){ + $output = + '// ===================================== GENERATED ===================================='. PHP_EOL . + $data_gen . PHP_EOL . + '// ===================================== AS-IS ===================================='. PHP_EOL . + $data_asis; + }else{ + $output = '{"generated":['. $data_gen .'],'. + '"as-is":[' . $data_asis . ']}'; + } + return $output; + } + + public function separator() + { + if(!$this->__curr_input_is_empty){ + $this->__curr_input_file->append(','); + } + return ''; + } + + private function __process_file($options) + { + $footer = $this->__JsonExporter->footer($options); + $this->__curr_input_file->append($footer); + $pythonSrcipt = $this->__script_path; + $in = $this->__curr_input_file->path; + $out1 = $this->__yara_file_gen->path; + $out2 = $this->__yara_file_asis->path; + $logging = $this->__end_of_cmd; + $raw_flag = $this->__raw_mode ? '--raw' : ''; + $result = shell_exec("python3 $pythonSrcipt --input $in --out-generated $out1 --out-asis $out2 $raw_flag $logging"); + $this->__curr_input_file->close(); + $this->__curr_input_file->delete(); + $this->__n_attributes = 0; + } + + public function generateRandomFileName() + { + return (new RandomTool())->random_str(false, 12); + } +} diff --git a/app/Model/Attribute.php b/app/Model/Attribute.php index 2ad1e856c..e4d18d4e4 100644 --- a/app/Model/Attribute.php +++ b/app/Model/Attribute.php @@ -388,6 +388,8 @@ class Attribute extends AppModel 'suricata' => array('txt', 'NidsSuricataExport', 'rules'), 'snort' => array('txt', 'NidsSnortExport', 'rules'), 'text' => array('txt', 'TextExport', 'txt'), + 'yara' => array('txt', 'YaraExport', 'yara'), + 'yara-json' => array('json', 'YaraExport', 'json'), 'rpz' => array('rpz', 'RPZExport', 'rpz'), 'csv' => array('csv', 'CsvExport', 'csv'), 'cache' => array('txt', 'CacheExport', 'cache') diff --git a/app/Model/Event.php b/app/Model/Event.php index 0918d4edf..69d831096 100755 --- a/app/Model/Event.php +++ b/app/Model/Event.php @@ -147,6 +147,22 @@ class Event extends AppModel 'params' => array('returnFormat' => 'text', 'includeAttachments' => 1), 'description' => 'Click on one of the buttons below to download all the attributes with the matching type. This list can be used to feed forensic software when searching for susipicious files. Only published events and attributes marked as IDS Signature are exported.' ), + 'yara' => array( + 'extension' => '.yara', + 'type' => 'Yara', + 'scope' => 'Event', + 'requiresPublished' => 1, + 'params' => array('returnFormat' => 'yara'), + 'description' => 'Click this to download Yara rules generated from all relevant attributes.' + ), + 'yara-json' => array( + 'extension' => '.json', + 'type' => 'Yara', + 'scope' => 'Event', + 'requiresPublished' => 1, + 'params' => array('returnFormat' => 'yara-json'), + 'description' => 'Click this to download Yara rules generated from all relevant attributes. Rules are returned in a JSON format with information about origin (generated or parsed) and validity.' + ), ); public $validFormats = array( @@ -160,6 +176,8 @@ class Event extends AppModel 'csv' => array('csv', 'CsvExport', 'csv'), 'stix' => array('xml', 'Stix1Export', 'xml'), 'stix2' => array('json', 'Stix2Export', 'json'), + 'yara' => array('txt', 'YaraExport', 'yara'), + 'yara-json' => array('json', 'YaraExport', 'json'), 'cache' => array('txt', 'CacheExport', 'cache') ); diff --git a/app/files/scripts/yara/misp2yara.py b/app/files/scripts/yara/misp2yara.py new file mode 100644 index 000000000..6cec97314 --- /dev/null +++ b/app/files/scripts/yara/misp2yara.py @@ -0,0 +1,549 @@ +from yaratemplate import YaraLexerException, YaraRuleTemplate, YaraFileTemplate, YaraTemplateException, YaraTemplateRuleConflictException +import uuid + + +# =========================== CORE EXPORTERS =================================== +def mispevent2yara(event, options={}): + default_opts = { + 'chaining_op': 'or', + 'display_attr_uuids': False, + 'max_attrs_per_rule': 1000, + 'event_uuid_only': True + } + default_opts.update(options) + opts = default_opts + if not event['Attribute']: + return [] + generated, asis_valid, asis_broken = mispattrs2yara(event['Attribute'], opts) + for rule_index, r in enumerate(generated + asis_valid): + if not r.loaded_from_source and r.attr_count() > 1: + rulename = 'MISP_EVENT_{}_PART{}'.format(event['uuid'].replace('-', '_'), rule_index+1) + r.set_name(rulename) + r.add_meta('MISP_EVENT_UUID', event['uuid']) + r.add_meta('MISP_EVENT_INFO', event['info']) + return generated, asis_valid, asis_broken + + +def mispobject2yara(obj): + pass + + +def mispattrs2yara(attrs_array, options={}): + if not attrs_array: + return [] + opts = { + 'chaining_op': 'or', + 'max_attrs_per_rule': 1 + } + opts.update(options) + generated_rules = [] + asis_valid_rules = [] + asis_broken_rules = [] + current_rule = MISPRuleTemplate() + for i, attr in enumerate(attrs_array): + if attr['type'] == 'yara': + try: + yara_rules = MISPRuleTemplate.from_yara_attr(attr) + asis_valid_rules += yara_rules + except YaraTemplateException as e: + comment = '/* MISP EXPORT COMMENT\n' + comment += ' MISP_UUID: {}\n'.format(attr['uuid']) + comment += ' {}\n'.format(str(e)) + comment += '*/\n' + commented_attr = '{}{}'.format(comment, attr['value']) + asis_broken_rules.append(commented_attr) + else: + current_rule.add_attribute(attr, opts) + last_attr_reached = i == len(attrs_array)-1 + max_size_reached = current_rule.attr_count() >= opts['max_attrs_per_rule'] + if last_attr_reached or max_size_reached: + # if rule has "strings" section, generate the corresponding "condition" + if current_rule.strings: + if opts['chaining_op'] == 'or': + current_rule.or_condition('any of them') + elif opts['chaining_op'] == 'and': + current_rule.and_condition('all of them') + # if rule has "condition" section, add meta, rename and add it to results, else discard it + if current_rule.condition: + generated_rules.append(current_rule) + current_rule = MISPRuleTemplate() + return generated_rules, asis_valid_rules, asis_broken_rules + + +# =========================== ATTR HANDLERS CORE =============================== +class MISPRuleTemplate(YaraRuleTemplate): + + def __init__(self, rulename=None): + super().__init__(rulename) + self._attributes_count = 0 + + @classmethod + def from_yara_attr(cls, mispattr): + rules = cls.from_source(mispattr['value']) + for rule in rules: + rule._enrich(mispattr) + return rules + + def add_attribute(self, mispattr, options): + opts = { + 'chaining_op': 'or', + } + opts.update(options) + self._handle(mispattr, opts) + self._attributes_count += 1 + event_only = False + if 'event_uuid_only' in opts and opts['event_uuid_only']: + event_only = True + self._enrich(mispattr, event_uuid_only=event_only) + self._generate_name(mispattr) + return self + + def _enrich(self, attr, event=None, event_uuid_only=False): + if not event and 'Event' in attr: + event = attr['Event'] + # META: + # attribute uuids + if not event_uuid_only: + uuid_meta = '{} ({})'.format(attr['uuid'], attr['type']) + self.add_meta('MISP_UUID', uuid_meta) + # event uuids + if event: + self.add_meta('MISP_EVENT_UUID', event['uuid']) + self.add_meta('MISP_EVENT_INFO', event['info']) + # other META and TAGS: + if self.loaded_from_source: + self.add_tag('as_is') + if self.autofixed: + self.add_tag('repaired') + origin_msg = 'Loaded from a corrupted Yara attribute, '\ + + 'automatically repaired.'\ + + 'Some comments may have been removed by parser. '\ + + 'Rule may be unreliable.' + self.add_meta('MISP_ORIGIN', origin_msg) + self.add_meta('MISP_FIX_NOTES', self.autofixed_comment) + else: + self.add_tag('valid') + validity_msg = 'Loaded as-is from a Yara attribute. ' \ + + 'Some comments may have been removed by parser.' + self.add_meta('MISP_ORIGIN', validity_msg) + else: + self.add_tag('generated') + self.add_meta('MISP_ORIGIN', 'Automatically generated ' \ + + 'from non-Yara attribute(s)') + return self + + def _generate_name(self, attr): + if self.loaded_from_source: + pass + elif self._attributes_count == 1: + name = 'MISP_ATTRIBUTE_{}'.format(attr['uuid']) + self.set_name(name) + else: + rand_id = str(uuid.uuid4()).replace('-', '') + name = 'MISP_MULTI_ATTRIBUTES_{}'.format(rand_id) + self.set_name(name) + return self + + def attr_count(self): + return self._attributes_count + + def _handle(self, attr, opts): + attr_type = attr['type'] + handler = self._get_type_handler(attr_type) + if handler: + handler(attr, opts) + return self + +# =========================== ATTR HANDLERS ==================================== + def _get_type_handler(self, attr_type): + handlers = { + 'md5': self._md5, + 'sha1': self._sha1, + 'sha256': self._sha256, + # 'filename': self._filename, # unsupported by yara + 'filename|md5': self._filename_md5, + 'filename|sha1': self._filename_sha1, + 'filename|sha256': self._filename_sha256, + 'ip-src': self._ip_src, + 'ip-dst': self._ip_dst, + 'hostname': self._hostname, + 'domain': self._domain, + 'domain|ip': self._domain_ip, + 'email-src': self._email_src, + 'email-dst': self._email_dst, + 'email-subject': self._email_subject, + 'email-body': self._email_body, + 'url': self._url, + 'regkey': self._regkey, + 'regkey|value': self._regkey_value, + 'pattern-in-file': self._pattern_in_file, + 'pattern-in-traffic': self._pattern_in_traffic, + 'pattern-in-memory': self._pattern_in_memory, + # 'yara': self._yara, # specific case, see _yara2yaras() + 'cookie': self._cookie, + 'vulnerability': self._vulnerability, + 'text': self._text, + 'hex': self._hex, + 'named pipe': self._named_pipe, + 'mutex': self._mutex, + 'btc': self._btc, + 'xmr': self._xmr, + 'uri': self._uri, + # 'authentihash': self._authentihash, # unsupported by yara + # 'ssdeep': self._ssdeep, # unsupported by yara + 'imphash': self._imphash, + # 'pehash': self._pehash, # unsupported by yara + # 'impfuzzy': self._impfuzzy, # unsupported by yara + # 'sha224': self._sha224, # unsupported by yara + # 'sha384': self._sha384, # unsupported by yara + # 'sha512': self._sha512, # unsupported by yara + # 'sha512/224': self._sha512_224, # unsupported by yara + # 'sha512/256': self._sha512_256, # unsupported by yara + # 'tlsh': self._tlsh, # unsupported by yara + # 'cdhash': self._cdhash, # unsupported by yara + # 'filename|authentihash': self._filename_authentihash, # unsupported by yara + # 'filename|ssdeep': self._filename_ssdeep, # unsupported by yara + 'filename|imphash': self._filename_imphash, + # 'filename|impfuzzy': self._filename_impfuzzy, # unsupported by yara + # 'filename|pehash': self._filename_pehash, # unsupported by yara + # 'filename|sha224': self._filename_sha224, # unsupported by yara + # 'filename|sha384': self._filename_sha384, # unsupported by yara + # 'filename|sha512': self._filename_sha512, # unsupported by yara + # 'filename|sha512/224': self._filename_sha512_224, # unsupported by yara + # 'filename|sha512/256': self._filename_sha512_256, # unsupported by yara + # 'filename|tlsh': self._filename_tlsh, # unsupported by yara + 'windows-scheduled-task': self._windows_scheduled_task, + 'windows-service-name': self._windows_service_name, + 'windows-service-displayname': self._windows_service_displayname, + # 'x509-fingerprint-sha1': self._x509_fingerprint_sha1, # TODO check if doable + # 'x509-fingerprint-md5': self._x509_fingerprint_md5, # TODO check if doable + # 'x509-fingerprint-sha256': self._x509_fingerprint_sha256, # TODO check if doable + # 'size-in-bytes': self._size_in_bytes, # too many false positives + 'ip-dst|port': self._ip_dst_port, + 'ip-src|port': self._ip_src_port, + 'hostname|port': self._hostname_port, + 'email-dst-display-name': self._email_dst_display_name, + 'email-src-display-name': self._email_src_display_name, + 'email-header': self._email_header, + 'email-reply-to': self._email_reply_to, + 'email-x-mailer': self._email_x_mailer, + 'email-mime-boundary': self._email_mime_boundary, + 'email-thread-index': self._email_thread_index, + 'email-message-id': self._email_message_id, + 'github-username': self._github_username, + 'github-repository': self._github_repository, + 'github-organisation': self._github_organisation, + 'mobile-application-id': self._mobile_application_id, + + 'user-agent': self._user_agent, + } + if attr_type in handlers: + return handlers[attr_type] + else: + return None + + def __generic_string(self, value, opts): + self.strings_text(None, value, + escape_newlines=True, + nocase=False, + ascii=True, + wide=True, + xor=False, + fullword=False) + return self + + def _md5(self, attr, opts): + filehash = attr['value'] + self.add_module_dependency('hash') + self.or_condition('hash.md5(0, filesize) == "{}"'.format(filehash)) + return self + + def _sha1(self, attr, opts): + filehash = attr['value'] + self.add_module_dependency('hash') + self.or_condition('hash.sha1(0, filesize) == "{}"'.format(filehash)) + return self + + def _sha256(self, attr, opts): + filehash = attr['value'] + self.add_module_dependency('hash') + self.or_condition('hash.sha256(0, filesize) == "{}"'.format(filehash)) + return self + + # def _filename(self, attr, opts): + # self.__generic_string(attr['value'], opts) + # return self + + def _filename_md5(self, attr, opts): + filename, _, filehash = attr['value'].rpartition('|') + self.add_module_dependency('hash') + self.or_condition('hash.md5(0, filesize) == "{}"'.format(filehash)) + return self + + def _filename_sha1(self, attr, opts): + filename, _, filehash = attr['value'].rpartition('|') + self.add_module_dependency('hash') + self.or_condition('hash.sha1(0, filesize) == "{}"'.format(filehash)) + return self + + def _filename_sha256(self, attr, opts): + filename, _, filehash = attr['value'].rpartition('|') + self.add_module_dependency('hash') + self.or_condition('hash.sha256(0, filesize) == "{}"'.format(filehash)) + return self + + def _ip_src(self, attr, opts): + self.__generic_string(attr['value'], opts) + return self + + def _ip_dst(self, attr, opts): + self.__generic_string(attr['value'], opts) + return self + + def _hostname(self, attr, opts): + self.__generic_string(attr['value'], opts) + return self + + def _domain(self, attr, opts): + self.__generic_string(attr['value'], opts) + return self + + def _domain_ip(self, attr, opts): + domain, _, ip = attr['value'].rpartition('|') + self.__generic_string(domain, opts) + self.__generic_string(ip, opts) + return self + + def _email_src(self, attr, opts): + self.__generic_string(attr['value'], opts) + return self + + def _email_dst(self, attr, opts): + self.__generic_string(attr['value'], opts) + return self + + def _email_subject(self, attr, opts): + self.__generic_string(attr['value'], opts) + return self + + def _email_body(self, attr, opts): + self.__generic_string(attr['value'], opts) + return self + + def _url(self, attr, opts): + self.__generic_string(attr['value'], opts) + return self + + def _regkey(self, attr, opts): + self.__generic_string(attr['value'], opts) + return self + + def _regkey_value(self, attr, opts): + regkey, _, regvalue = attr['value'].rpartition('|') + self.__generic_string(regkey, opts) + self.__generic_string(regvalue, opts) + return self + + def _pattern_in_file(self, attr, opts): + self.__generic_string(attr['value'], opts) + return self + + def _pattern_in_traffic(self, attr, opts): + self.__generic_string(attr['value'], opts) + return self + + def _pattern_in_memory(self, attr, opts): + self.__generic_string(attr['value'], opts) + return self + + def _yara(self, attr, opts): + self.__generic_string(attr['value'], opts) + return self + + def _cookie(self, attr, opts): + self.__generic_string(attr['value'], opts) + return self + + def _vulnerability(self, attr, opts): + self.__generic_string(attr['value'], opts) + return self + + def _text(self, attr, opts): + self.__generic_string(attr['value'], opts) + return self + + def _hex(self, attr, opts): + self.strings_hex(None, attr['value']) + return self + + def _named_pipe(self, attr, opts): + self.__generic_string(attr['value'], opts) + return self + + def _mutex(self, attr, opts): + self.__generic_string(attr['value'], opts) + return self + + def _btc(self, attr, opts): + self.__generic_string(attr['value'], opts) + return self + + def _xmr(self, attr, opts): + self.__generic_string(attr['value'], opts) + return self + + def _uri(self, attr, opts): + self.__generic_string(attr['value'], opts) + return self + + # def _authentihash(self, attr, opts): + # self.__generic_string(attr['value'], opts) + # return self + # + # def _ssdeep(self, attr, opts): + # self.__generic_string(attr['value'], opts) + # return self + + def _imphash(self, attr, opts): + filehash = attr['value'] + self.add_module_dependency('pe') + self.or_condition('pe.imphash() == "{}"'.format(filehash)) + return self + + def _filename_imphash(self, attr, opts): + filename, _, filehash = attr['value'].rpartition('|') + self.add_module_dependency('pe') + self.or_condition('pe.imphash() == "{}"'.format(filehash)) + return self + + # def _filename_impfuzzy(self, attr, opts): + # self.__generic_string(attr['value'], opts) + # return self + # + # def _filename_pehash(self, attr, opts): + # self.__generic_string(attr['value'], opts) + # return self + # + # def _filename_sha224(self, attr, opts): + # self.__generic_string(attr['value'], opts) + # return self + # + # def _filename_sha384(self, attr, opts): + # self.__generic_string(attr['value'], opts) + # return self + # + # def _filename_sha512(self, attr, opts): + # self.__generic_string(attr['value'], opts) + # return self + # + # def _filename_sha512_224(self, attr, opts): + # self.__generic_string(attr['value'], opts) + # return self + # + # def _filename_sha512_256(self, attr, opts): + # self.__generic_string(attr['value'], opts) + # return self + # + # def _filename_tlsh(self, attr, opts): + # self.__generic_string(attr['value'], opts) + # return self + + def _windows_scheduled_task(self, attr, opts): + self.__generic_string(attr['value'], opts) + return self + + def _windows_service_name(self, attr, opts): + self.__generic_string(attr['value'], opts) + return self + + def _windows_service_displayname(self, attr, opts): + self.__generic_string(attr['value'], opts) + return self + + # TODO: check if that can be implemented + # def _x509_fingerprint_sha1(self, attr, opts): + # self.__generic_string(attr['value'], opts) + # return self + # + # def _x509_fingerprint_md5(self, attr, opts): + # self.__generic_string(attr['value'], opts) + # return self + # + # def _x509_fingerprint_sha256(self, attr, opts): + # self.__generic_string(attr['value'], opts) + # return self + + # TODO: too many false-positives but could be OK in objects + # def _size_in_bytes(self, attr, opts): + # self.__generic_string(attr['value'], opts) + # return self + # + + # likely false positives on ports, also can't guess ip:port format. + # Ignoring port + def _ip_dst_port(self, attr, opts): + ip, _, port = attr['value'].rpartition('|') + self.__generic_string(ip, opts) + return self + + def _ip_src_port(self, attr, opts): + ip, _, port = attr['value'].rpartition('|') + self.__generic_string(ip, opts) + return self + + def _hostname_port(self, attr, opts): + host, _, port = attr['value'].rpartition('|') + self.__generic_string(host, opts) + return self + + def _email_dst_display_name(self, attr, opts): + self.__generic_string(attr['value'], opts) + return self + + def _email_src_display_name(self, attr, opts): + self.__generic_string(attr['value'], opts) + return self + + def _email_header(self, attr, opts): + self.__generic_string(attr['value'], opts) + return self + + def _email_reply_to(self, attr, opts): + self.__generic_string(attr['value'], opts) + return self + + def _email_x_mailer(self, attr, opts): + self.__generic_string(attr['value'], opts) + return self + + def _email_mime_boundary(self, attr, opts): + self.__generic_string(attr['value'], opts) + return self + + def _email_thread_index(self, attr, opts): + self.__generic_string(attr['value'], opts) + return self + + def _email_message_id(self, attr, opts): + self.__generic_string(attr['value'], opts) + return self + + def _github_username(self, attr, opts): + self.__generic_string(attr['value'], opts) + return self + + def _github_repository(self, attr, opts): + self.__generic_string(attr['value'], opts) + return self + + def _github_organisation(self, attr, opts): + self.__generic_string(attr['value'], opts) + return self + + def _mobile_application_id(self, attr, opts): + self.__generic_string(attr['value'], opts) + return self + + def _user_agent(self, attr, opts): + self.__generic_string(attr['value'], opts) + return self diff --git a/app/files/scripts/yara/permissive_plyara.py b/app/files/scripts/yara/permissive_plyara.py new file mode 100644 index 000000000..591aa35d0 --- /dev/null +++ b/app/files/scripts/yara/permissive_plyara.py @@ -0,0 +1,157 @@ +import plyara +from plyara.exceptions import ParseError +from plyara import * +from unittest import mock +import re +import string + +PERMISSIVE_MODE = True # set to False to use the regular, strict, plyara parser for debugging + + +_original_match = re.match +def _multiline_match(pattern, string, flags=0): + return _original_match(pattern, string, flags=flags | re.DOTALL | re.MULTILINE) + + +class _MultilinePlyara(plyara.Plyara): + + def parse_string(self, input_string): + with mock.patch.object(re, 'match', _multiline_match): + return super(plyara.Plyara, self).parse_string(input_string) + + +class PermissivePlyara(): + + def parse_string(self, input_string): + try: + if PERMISSIVE_MODE: + return self._permissive_parse_string(input_string) + else: + return plyara.Plyara().parse_string(input_string) + except ParseError as e: + raise + # some errors are not properly caught by plyara + # convert everything to ParseError to avoid uncatchable crashes + except Exception as e: + raise ParseError('Uncaught plyara exception ({}): {}'.format(type(e).__name__, str(e)), None, None) + + def _permissive_parse_string(self, input_string, fix_notes=None, original_error=None): + if not fix_notes: + fix_notes = set() + # with mock.patch.object(re, 'match', overridden_match): + try: + # res = super(Plyara, self).parse_string(input_string) # weird failures, couldn't debug. possibly due to internal state + # re-instanciating playra to avoid internal state errors + res = _MultilinePlyara().parse_string(input_string) + if fix_notes: + for r in res: + r['permissive_plyara_fixed'] = True + r['permissive_plyara_comment'] = '. '.join(fix_notes) + return res + except ParseError as e: + if not original_error: + original_error = e + str_error = str(e) + fixed = input_string + if str_error.startswith('Illegal character') and any(elem in str_error for elem in '”“″'): + fixed = _fix_quotes(input_string) + fix_notes.add('Wrong quotes characters') + elif str_error.startswith('Unknown text Rule'): + fixed = _fix_capital(input_string) + fix_notes.add('Rule => rule') + elif str_error.startswith('Illegal character'): + fixed = _fix_illegal_chars(input_string) + fix_notes.add('Illegal characters') + elif str_error.startswith('Unknown text { for token of type LBRACE') \ + and input_string.lstrip().startswith \ + and input_string.rstrip().endswith('}'): + fixed = _fix_noname(input_string) + fix_notes.add('Missing rule name') + elif re.match(r'Unknown text\s?_\s?for token of type ID', str_error): + fixed = _fix_spaced_underscores(input_string) + fix_notes.add("' _ ' => '_'") + else: + fixed = _fix_magic(input_string) + fix_notes.add('Magic fix (highly unreliable)') + + if fixed != input_string: + return self._permissive_parse_string(fixed, fix_notes, original_error) + else: + raise original_error + # best_error = 'BEST GUESS ERROR: {}\n'.format(str(e)) + # best_guess = 'BEST GUESS: \n{}'.format(input_string) + # raise ParseError(best_error+best_guess, None, None) from e + + +def _fix_quotes(yara_src): + repaired = yara_src + repaired = repaired.replace('”', '"') + repaired = repaired.replace('“', '"') + repaired = repaired.replace('″', '"') + return repaired + +def _fix_capital(yara_src): + repaired = yara_src.replace('Rule', 'rule') + return repaired + +def _fix_illegal_chars(yara_src): + repaired = ''.join(filter(lambda x: x in string.printable, yara_src)) + return repaired + +def _fix_noname(yara_src): + repaired = 'rule UnnamedRule ' + yara_src + return repaired + +def _fix_spaced_underscores(yara_src): + repaired = yara_src.replace(' _ ', '_') + return repaired + +def _fix_magic(yara_src): + repaired = '' + for line in yara_src.splitlines(): + if '//' not in line: + repaired += line + else: + repaired += '\n{}\n'.format(line) + return repaired if repaired else yara_src + + +# Keeping this code for later as it contains more advanced fixes + +# def _try_simple_repairs(yara_src, error): +# reasons = [] +# # common quotes error +# repaired = yara_src +# repaired = repaired.replace('”', '"') +# repaired = repaired.replace('“', '"') +# repaired = repaired.replace('″', '"') +# if repaired != yara_src: +# reasons.append('wrong quotes characters') +# # missing rule declaration +# if repaired.strip().startswith('{'): +# reasons.append('missing rule name') +# rulename = 'UnnamedRule' +# repaired = 'rule {} {}'.format(rulename, repaired) +# # capital letter rule declaration +# if repaired.strip().startswith('Rule'): +# reasons.append('Rule => rule') +# repaired = repaired.replace('Rule', 'rule') +# if 'Illegal character' in str(error): +# repaired = ''.join(filter(lambda x: x in string.printable, repaired)) +# reasons.append('illegal characters') +# # badly formated declaration: +# # check if rule matches format : DATA rule DECLARATION { CONTENT } DATA +# split_source = re.split(r'rule\s(.*?){(.*)}', repaired, flags=re.MULTILINE|re.DOTALL) +# if len(split_source) == 4: +# split_source = {'pre': split_source[0], +# 'declaration': split_source[1].replace(' ', '').rstrip().rstrip(':'), +# 'content': split_source[2], +# 'post': split_source[3]} +# quoted_content = re.split(r'\"(.+?)\"', split_source['content'], re.MULTILINE | re.DOTALL) +# nolinebreak_content = '' +# for chunk in quoted_content: # remove line breaks in strings and meta +# if chunk.startswith('"') and chunk.endswith('"'): +# nolinebreak_content += ''.join(chunk.splitlines()) +# reassembled = 'rule {} {{ {} }}'.format(split_source['declaration'], split_source['content']) +# repaired = reassembled +# return repaired, reasons diff --git a/app/files/scripts/yara/yaraexport.py b/app/files/scripts/yara/yaraexport.py new file mode 100644 index 000000000..2b7fb01f6 --- /dev/null +++ b/app/files/scripts/yara/yaraexport.py @@ -0,0 +1,94 @@ +from misp2yara import mispevent2yara, mispattrs2yara, MISPRuleTemplate +import sys +import json +import os +from optparse import OptionParser + + +def rules2json_export(rules, extra_comment=''): + return json.dumps([rule2json_export(r) for r in rules]) + +def rule2json_export(rule, extra_comment=''): + json_dict = { + 'value': str(rule), + 'comment': '', + 'valid': None + } + if isinstance(rule, MISPRuleTemplate): + if rule.loaded_from_source: + json_dict['comment'] += 'Loaded from source. ' + else: + json_dict['comment'] += 'Generated. ' + if rule.autofixed: + json_dict['comment'] += 'May be unreliable due to automatic repairs: ' + json_dict['comment'] += rule.autofixed_comment + json_dict['valid'] = True + return json_dict + else: + json_dict['comment'] += 'Broken yara attribute. Could not parse or repair.' + json_dict['valid'] = False + return json_dict + +def file_is_empty(path): + return os.stat(path).st_size==0 + +def output_json(output_path, output_rules): + with open(output_path, 'a+', encoding='utf-8') as f: + if file_is_empty(output_path): + pass + else: + f.write(',') + to_write = rules2json_export(output_rules)[1:-1] + f.write(to_write) + +def output_raw(output_path, output_rules): + with open(output_path, 'a+', encoding='utf-8') as f: + to_write = '\n\n'.join([str(r) for r in output_rules]) + f.write(to_write) + +if __name__ == "__main__": + parser = OptionParser() + parser.add_option("-i", "--input", dest="in_file", + help="input file", metavar="FILE") + parser.add_option("-g", "--out-generated", dest="out_gen", + help="output for generated rules", metavar="FILE") + parser.add_option("-a", "--out-asis", dest="out_asis", + help="output for as-is rules", metavar="FILE") + parser.add_option("-r", "--raw", + action="store_true", dest="raw_output", default=False, + help="outputs raw yara rules instead of json-structured rules") + (options, args) = parser.parse_args() + + in_path = options.in_file + out_path_gen = options.out_gen + out_path_asis = options.out_asis + raw_mode = options.raw_output + + loaded = None + with open(in_path, 'r', encoding='utf-8') as in_file: + content = in_file.read() + if content: + loaded = json.loads(content)['response'] + # raise Warning("loaded {}".format(content)) + if 'Attribute' in loaded: + generated, asis_valid, asis_broken = mispattrs2yara(loaded['Attribute']) + elif isinstance(loaded, list): + generated = [] + asis_valid = [] + asis_broken = [] + for event_dict in loaded: + if 'Event' in event_dict: + curr_generated, curr_asis_valid, curr_asis_broken = mispevent2yara(event_dict['Event']) + generated += curr_generated + asis_valid += curr_asis_valid + asis_broken += curr_asis_broken + else: + raise Exception('Json doesn\'t seem to be an list of attributes or events') + else: + raise Exception('Json doesn\'t seem to be an list of attributes or events') + if raw_mode: + output_raw(out_path_gen, generated) + output_raw(out_path_asis, asis_valid + asis_broken) + else: + output_json(out_path_gen, generated) + output_json(out_path_asis, asis_valid + asis_broken) diff --git a/app/files/scripts/yara/yaratemplate.py b/app/files/scripts/yara/yaratemplate.py new file mode 100644 index 000000000..a6a2b0403 --- /dev/null +++ b/app/files/scripts/yara/yaratemplate.py @@ -0,0 +1,352 @@ +from permissive_plyara import PermissivePlyara +from permissive_plyara import ParseError +import plyara +from plyara import utils +import warnings +import re +__version__ = '0.1' +__yara_version__ = '8.1' + + +class YaraTemplateException(Exception): + def __init__(self, message, source=None): + super(Exception, self).__init__(message) + self.source = source + + +class YaraLexerException(YaraTemplateException): + pass + + +class YaraTemplateRuleConflictException(YaraTemplateException): + pass + + +class YaraTemplateRuleDependencyException(YaraTemplateException): + pass + + +class YaraRuleTemplate: + + class _YaraStringsItem: + def __init__(self, stringstype, name, value, + modifiers, force_escape=True): + if not name.startswith('$'): + name = '${}'.format(name) + if stringstype == 'byte': + value = '{{ {} }}'.format(value) + elif stringstype == 'text': + if force_escape: + value = yara_escape_str(value) + value = '"{}"'.format(value) + elif stringstype == 'regex': + if force_escape: + # escape all unescaped '/' + value = re.sub(r'(?<=[^\\])/', r'\\'+r'/', value) + # # quick and dirty way to get rid of illegal line carriages in regexes + # value = ''.join([l.strip() for l in value.splitlines()]) + value = '/{}/'.format(value) + self.stringstype = stringstype + self.name = name + self.value = value + self.modifiers = modifiers + + def __str__(self): + name = self.name + value = self.value + modifiers = ' '.join(self.modifiers) + return "{} = {} {}".format(name, value, modifiers) + + def __init__(self, rulename): + self.rulename = rulename + self.ruletags = set() + self.rulescopes = set() # can be empty, 'global' or 'private' + self.meta = set() + self.strings = [] # list instead of name=>value dict because of anonymous strings + self.condition = '' + self.file_dependencies = [] + self.rule_dependencies = [] + self.module_dependencies = [] + self.loaded_from_source = False + self.autofixed = False + self.autofixed_comment = '' + + @classmethod + def from_source(cls, yara_source): + if not isinstance(yara_source, str): + yara_source = str(yara_source) + try: + plyara_parsed = PermissivePlyara().parse_string(yara_source) + except ParseError as e: + raise YaraLexerException(str(e), yara_source) + rules = [] + try: + for plyara_rule in plyara_parsed: + rule = cls._from_plyara(plyara_rule) + rules.append(rule) + return rules + except YaraTemplateException as e: + e.source = yara_source + raise + + # Creates a YaraRuleTemplate from plyara's array output format + @classmethod + def _from_plyara(cls, plyara_out): + plyara_out = cls._ensure_one_rule(plyara_out) + rule = cls(plyara_out['rule_name']) + rule.loaded_from_source = True + if 'tags' in plyara_out: + rule.ruletags.update(plyara_out['tags']) + if 'scopes' in plyara_out: + rule.rulescopes.update(plyara_out['scopes']) + if 'metadata' in plyara_out: + for m in plyara_out['metadata']: + for k, v in m.items(): + rule.add_meta(k,v) + if 'strings' in plyara_out: + for s in plyara_out['strings']: + s_modifiers = s['modifiers'] if 'modifiers' in s else [] + if s['type'] == 'byte' or s['type'] == 'regex': + value = s['value'][1:-1] + else: + value = s['value'] + rule._strings(s['type'], s['name'], value, s_modifiers) + if 'raw_condition' in plyara_out: + _, cond = plyara_out['raw_condition'].split("condition:",1) + rule.condition = cond + # parsing conditions is too tricky and prone to errors + # rule.condition = " ".join(plyara_out['condition_terms']) + else: + return rule # stop and return to avoid uncaught plyara exceptions + if 'includes' in plyara_out: + rule.file_dependencies = plyara_out['includes'] + rule.rule_dependencies = plyara.utils.detect_dependencies(plyara_out) + rule.module_dependencies = plyara.utils.detect_imports(plyara_out) + if 'permissive_plyara_fixed' in plyara_out \ + and plyara_out['permissive_plyara_fixed']: + rule.autofixed = True + if 'permissive_plyara_comment' in plyara_out: + rule.autofixed_comment = plyara_out['permissive_plyara_comment'] + return rule + + def __str__(self): + includes = set(self.file_dependencies) + imports = set(self.module_dependencies) + includes_str = '\n'.join(['include "{}"'.format(i) for i in includes]) + imports_str = '\n'.join(['import "{}"'.format(i) for i in imports]) + scopes = (' '.join(self.rulescopes) + ' ') if self.rulescopes else '' + tags_str = (' : ' + ' '.join(self.ruletags)) if self.ruletags else '' + declaration = '{}rule {}{}'.format(scopes, self.rulename, tags_str) + meta_section = '' + strings_section = '' + condition_section = '' + if self.meta: + sorted_meta = sorted(self.meta) + meta_section += '\tmeta:' + for (m, v) in sorted_meta: + meta_section += '\n\t\t{} = "{}"'.format(m, v) + meta_section += '\n' + if self.strings: + strings_section += '\tstrings:' + for s in self.strings: + strings_section += '\n\t\t{}'.format(s) + strings_section += '\n' + if self.condition: + condition_section += '\tcondition:' + for cond_line in self.condition.splitlines(): + stripped = cond_line.strip() + if stripped: + condition_section += '\n\t\t{}'.format(stripped) + result = '{}\n{}\n{}\n{{\n{}{}{}\n}}'.format(includes_str, + imports_str, + declaration, + meta_section, + strings_section, + condition_section) + if not self.condition: + result = '// this rule will not compile (mandatory "condition" section missing)\n{}'.format(result) + return result + + def add_meta(self, meta_key, meta_value): + # remove illegal characters (same filter as "strings" entries) + meta_value = yara_escape_str(str(meta_value)) + self.meta.add((meta_key, meta_value)) + return self + + def set_name(self, name): + # replace forbidden characters with '_' + name = re.sub(r'[^A-Za-z0-9_]', '_', name) + if name[0].isdigit(): + name = '_{}'.format(name) + self.rulename = name + return self + + def add_tag(self, tag): + # replace forbidden characters with '_' + tag = re.sub(r'[^A-Za-z0-9_]', '_', tag) + if tag[0].isdigit(): + tag = '_{}'.format(tag) + self.ruletags.add(tag) + return self + + def set_condition(self, condition_expression): + self.condition = condition_expression + return self + + def and_condition(self, condition_expression): + if not self.condition: + self.condition = '{}'.format(condition_expression) + else: + self.condition = '{}\n and {}'.format(self.condition, + condition_expression) + return self + + def or_condition(self, condition_expression): + if not self.condition: + self.condition = '{}'.format(condition_expression) + else: + self.condition = '{}\n or {}'.format(self.condition, + condition_expression) + return self + + # Adds an entry to the 'strings' section + # str_type can be 'byte', 'text' or 'regex' + # name could be None for anonymous strings + def _strings(self, str_type, name, value, modifiers): + if name == '$' or not name: + name = '$' + force_escape = False if self.loaded_from_source else True + str_entry = self._YaraStringsItem(str_type, name, value, modifiers, force_escape) + if str_entry.name == '$' or str_entry.name not in (o.name for o in self.strings): + self.strings.append(str_entry) + else: + raise YaraTemplateException( + 'There is already a string named "{}"'.format(str_entry.name)) + return self + + # adds a 'byte' entry ({}) to strings section (default: nocase ascii wide) + def strings_hex(self, name, value): + self._strings('byte', name, value, []) + return self + + # adds a 'text' entry ("") to strings section (default: nocase ascii wide) + def strings_text(self, name, value, escape_newlines=True, nocase=True, + ascii=True, wide=True, xor=False, fullword=False): + modifiers = [] + # escaping unescaped double quotes + if nocase: + modifiers.append('nocase') + if ascii: + modifiers.append('ascii') + if wide: + modifiers.append('wide') + if xor: + modifiers.append('xor') + if fullword: + modifiers.append('fullword') + if escape_newlines and len(value.splitlines()) > 1: + # only regex supports system-agnostic line breaks + value = _str2yara_regex(value) + self._strings('regex', name, value, modifiers) + elif len(value.splitlines()) > 1: + for line in value.splitlines(): + self._strings('text', name, line, modifiers) + # TODO: imporvement: group lines with 'all of $*' + # instead of (\r|\r\n|\n|\x1E) + else: + self._strings('text', name, value, modifiers) + return self + + # adds a 'regex' entry (//) to strings section (default: nocase ascii wide) + def strings_regex(self, name, value, nocase=True, ascii=True, + wide=True, fullword=False): + modifiers = [] + if nocase: + modifiers.append('nocase') + if ascii: + modifiers.append('ascii') + if wide: + modifiers.append('wide') + if fullword: + modifiers.append('fullword') + self._strings('regex', name, value, modifiers) + return self + + # adds an 'include' statement + def add_file_dependency(self, file_name): + if file_name not in self.file_dependencies: + self.file_dependencies.append(file_name) + return self + + # adds an rule dependency, useful to determine the order in a group of rules + def add_rule_dependency(self, rule_name): + if rule_name not in self.rule_dependencies: + self.rule_dependencies.append(rule_name) + return self + + # adds an 'import' dependency + def add_module_dependency(self, module_name): + if module_name not in self.module_dependencies: + self.module_dependencies.append(module_name) + return self + + @staticmethod + def _ensure_one_rule(plyara_output): + if isinstance(plyara_output, list): + if len(plyara_output) != 1: + error_msg = 'Single rule expected, \ + string contains {} rules'.format(len(plyara_output)) + raise YaraTemplateException(error_msg) + else: + return plyara_output[0] + else: + return plyara_output + + +# =============== Tools =================== + +# replaces special characters in yara 'text' strings +def yara_escape_str(pattern): + _special_chars_map = { + ord(b'\\'): '\\\\', + ord(b'"'): '\\"', + ord(b'\n'): '\\n', + ord(b'\t'): '\\t', + ord(b'\r'): '\\\\r' + } + return pattern.translate(_special_chars_map) + + +# helps convert a python string to a yara 'regex' string, escapes special chars +# handles newlines by making them system-agnostic and optional +def _str2yara_regex(pattern): + _special_chars_map = { + + ord(b'/'): '\\/', + # covers '\' and all escapes not valid in python but valid in yara: + # \w \W \s \S \d \D \B + ord(b'\\'): '\\\\', + ord(b'^'): '\\^', + ord(b'$'): '\\$', + ord(b'|'): '\\|', + ord(b'('): '\\(', + ord(b')'): '\\)', + ord(b'['): '\\[', + ord(b']'): '\\]', + + ord(b'*'): '\\*', + ord(b'+'): '\\+', + ord(b'?'): '\\?', + ord(b'{'): '\\{', + ord(b'}'): '\\}', + + ord(b'\t'): '\\t', + ord(b'\f'): '\\f', + ord(b'\a'): '\\a', + # covers \n \r\n \r and other exotic line breaks (\x1E) + ord(b'\n'): '(\\x0D|\\x0A\\x0D|\\x0A|\\x1E)?', + + ord(b'\b'): '\\b' + } + pattern = '\n'.join(pattern.splitlines()) + return pattern.translate(_special_chars_map) diff --git a/app/tmp/yara/empty b/app/tmp/yara/empty new file mode 100644 index 000000000..e69de29bb diff --git a/requirements.txt b/requirements.txt index 11d167be3..3c3249ced 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,3 +7,4 @@ requests-mock pip nose jsonschema +plyara >= 2.0.2