Yara export

pull/4543/head
edhoedt 2019-04-29 19:23:14 +02:00
parent e86b161d93
commit b9463e513c
9 changed files with 1314 additions and 0 deletions

View File

@ -0,0 +1,141 @@
<?php
App::uses('JsonExport', 'Export');
class YaraExport
{
private $__script_path = APP . 'files/scripts/yara/yaraexport.py';
private $__tmp_dir = APP . 'tmp/yara/';
private $__end_of_cmd = ' 2>>' . APP . 'tmp/logs/yara_export.log';
private $__n_attributes = 0;
private $__MAX_n_attributes = 15000;
private $__yara_file_gen = null;
private $__yara_file_asis = null;
private $__curr_input_file = null;
private $__scope = false;
private $__curr_input_is_empty = true;
private $__JsonExporter = false;
private $__raw_mode = true;
public $non_restrictive_export = true;
private static function __count_atributes($data)
{
$attributes_count = count($data['Attribute']);
// foreach ($data['Object'] as $_object) {
// $attributes_count += count($_object['Attribute']);
// }
}
public function header($options = array())
{
if($this->__JsonExporter === false){
$this->__JsonExporter = new JsonExport();
}
$this->__initialize_yara_file();
$this->__initialize_misp_file($options);
if($options['returnFormat'] === 'yara-json'){
$this->__raw_mode = false;
}
return '';
}
private function __initialize_yara_file()
{
$yaraFileName = $this->generateRandomFileName();
$this->__yara_file_gen = new File($this->__tmp_dir . $yaraFileName . '_generated', true, 0644);
$this->__yara_file_asis = new File($this->__tmp_dir . $yaraFileName . '_asis', true, 0644);
$this->__yara_file_gen->close();
$this->__yara_file_asis->close();
}
private function __initialize_misp_file($options)
{
$mispFileName = $this->generateRandomFileName();
$this->__curr_input_file = new File($this->__tmp_dir . $mispFileName, true, 0644);
$header = $this->__JsonExporter->header($options);
$this->__curr_input_file->append($header);
$this->__curr_input_is_empty = true;
}
public function handler($data, $options = array())
{
// convert attribute(s) to json and write them to input queue file
if ($options['scope'] === 'Attribute') {
$attr_count = 1;
} else if($options['scope'] === 'Event') {
$attr_count = YaraExport::__count_atributes($data);
}
if(!empty($data)){
if(!$this->__curr_input_is_empty){
$this->separator(); // calling separator since returning '' will prevent it
}
$jsonData = $this->__JsonExporter->handler($data, $options);
$this->__curr_input_file->append($jsonData);
$this->__curr_input_is_empty = false;
}
$this->__n_attributes += $attr_count;
// if the file exceeds the max_attributes, process it, delete it and reset the counter
if ($this->__n_attributes >= $this->__MAX_n_attributes){
$this->__process_file($options);
$this->__initialize_misp_file($options);
}
return '';
}
public function footer($options = array())
{
if(!($this->__curr_input_is_empty)){
$this->__process_file($options);
}
$file = new File($this->__yara_file_gen->path);
$data_gen = $file->read(true, 'r');
$file->close();
$file->delete();
$file = new File($this->__yara_file_asis->path);
$data_asis = $file->read(true, 'r');
$file->close();
$file->delete();
if($this->__raw_mode){
$output =
'// ===================================== GENERATED ===================================='. PHP_EOL .
$data_gen . PHP_EOL .
'// ===================================== AS-IS ===================================='. PHP_EOL .
$data_asis;
}else{
$output = '{"generated":['. $data_gen .'],'.
'"as-is":[' . $data_asis . ']}';
}
return $output;
}
public function separator()
{
if(!$this->__curr_input_is_empty){
$this->__curr_input_file->append(',');
}
return '';
}
private function __process_file($options)
{
$footer = $this->__JsonExporter->footer($options);
$this->__curr_input_file->append($footer);
$pythonSrcipt = $this->__script_path;
$in = $this->__curr_input_file->path;
$out1 = $this->__yara_file_gen->path;
$out2 = $this->__yara_file_asis->path;
$logging = $this->__end_of_cmd;
$raw_flag = $this->__raw_mode ? '--raw' : '';
$result = shell_exec("python3 $pythonSrcipt --input $in --out-generated $out1 --out-asis $out2 $raw_flag $logging");
$this->__curr_input_file->close();
$this->__curr_input_file->delete();
$this->__n_attributes = 0;
}
public function generateRandomFileName()
{
return (new RandomTool())->random_str(false, 12);
}
}

View File

@ -388,6 +388,8 @@ class Attribute extends AppModel
'suricata' => array('txt', 'NidsSuricataExport', 'rules'),
'snort' => array('txt', 'NidsSnortExport', 'rules'),
'text' => array('txt', 'TextExport', 'txt'),
'yara' => array('txt', 'YaraExport', 'yara'),
'yara-json' => array('json', 'YaraExport', 'json'),
'rpz' => array('rpz', 'RPZExport', 'rpz'),
'csv' => array('csv', 'CsvExport', 'csv'),
'cache' => array('txt', 'CacheExport', 'cache')

View File

@ -147,6 +147,22 @@ class Event extends AppModel
'params' => array('returnFormat' => 'text', 'includeAttachments' => 1),
'description' => 'Click on one of the buttons below to download all the attributes with the matching type. This list can be used to feed forensic software when searching for susipicious files. Only published events and attributes marked as IDS Signature are exported.'
),
'yara' => array(
'extension' => '.yara',
'type' => 'Yara',
'scope' => 'Event',
'requiresPublished' => 1,
'params' => array('returnFormat' => 'yara'),
'description' => 'Click this to download Yara rules generated from all relevant attributes.'
),
'yara-json' => array(
'extension' => '.json',
'type' => 'Yara',
'scope' => 'Event',
'requiresPublished' => 1,
'params' => array('returnFormat' => 'yara-json'),
'description' => 'Click this to download Yara rules generated from all relevant attributes. Rules are returned in a JSON format with information about origin (generated or parsed) and validity.'
),
);
public $validFormats = array(
@ -160,6 +176,8 @@ class Event extends AppModel
'csv' => array('csv', 'CsvExport', 'csv'),
'stix' => array('xml', 'Stix1Export', 'xml'),
'stix2' => array('json', 'Stix2Export', 'json'),
'yara' => array('txt', 'YaraExport', 'yara'),
'yara-json' => array('json', 'YaraExport', 'json'),
'cache' => array('txt', 'CacheExport', 'cache')
);

View File

@ -0,0 +1,549 @@
from yaratemplate import YaraLexerException, YaraRuleTemplate, YaraFileTemplate, YaraTemplateException, YaraTemplateRuleConflictException
import uuid
# =========================== CORE EXPORTERS ===================================
def mispevent2yara(event, options={}):
default_opts = {
'chaining_op': 'or',
'display_attr_uuids': False,
'max_attrs_per_rule': 1000,
'event_uuid_only': True
}
default_opts.update(options)
opts = default_opts
if not event['Attribute']:
return []
generated, asis_valid, asis_broken = mispattrs2yara(event['Attribute'], opts)
for rule_index, r in enumerate(generated + asis_valid):
if not r.loaded_from_source and r.attr_count() > 1:
rulename = 'MISP_EVENT_{}_PART{}'.format(event['uuid'].replace('-', '_'), rule_index+1)
r.set_name(rulename)
r.add_meta('MISP_EVENT_UUID', event['uuid'])
r.add_meta('MISP_EVENT_INFO', event['info'])
return generated, asis_valid, asis_broken
def mispobject2yara(obj):
pass
def mispattrs2yara(attrs_array, options={}):
if not attrs_array:
return []
opts = {
'chaining_op': 'or',
'max_attrs_per_rule': 1
}
opts.update(options)
generated_rules = []
asis_valid_rules = []
asis_broken_rules = []
current_rule = MISPRuleTemplate()
for i, attr in enumerate(attrs_array):
if attr['type'] == 'yara':
try:
yara_rules = MISPRuleTemplate.from_yara_attr(attr)
asis_valid_rules += yara_rules
except YaraTemplateException as e:
comment = '/* MISP EXPORT COMMENT\n'
comment += ' MISP_UUID: {}\n'.format(attr['uuid'])
comment += ' {}\n'.format(str(e))
comment += '*/\n'
commented_attr = '{}{}'.format(comment, attr['value'])
asis_broken_rules.append(commented_attr)
else:
current_rule.add_attribute(attr, opts)
last_attr_reached = i == len(attrs_array)-1
max_size_reached = current_rule.attr_count() >= opts['max_attrs_per_rule']
if last_attr_reached or max_size_reached:
# if rule has "strings" section, generate the corresponding "condition"
if current_rule.strings:
if opts['chaining_op'] == 'or':
current_rule.or_condition('any of them')
elif opts['chaining_op'] == 'and':
current_rule.and_condition('all of them')
# if rule has "condition" section, add meta, rename and add it to results, else discard it
if current_rule.condition:
generated_rules.append(current_rule)
current_rule = MISPRuleTemplate()
return generated_rules, asis_valid_rules, asis_broken_rules
# =========================== ATTR HANDLERS CORE ===============================
class MISPRuleTemplate(YaraRuleTemplate):
def __init__(self, rulename=None):
super().__init__(rulename)
self._attributes_count = 0
@classmethod
def from_yara_attr(cls, mispattr):
rules = cls.from_source(mispattr['value'])
for rule in rules:
rule._enrich(mispattr)
return rules
def add_attribute(self, mispattr, options):
opts = {
'chaining_op': 'or',
}
opts.update(options)
self._handle(mispattr, opts)
self._attributes_count += 1
event_only = False
if 'event_uuid_only' in opts and opts['event_uuid_only']:
event_only = True
self._enrich(mispattr, event_uuid_only=event_only)
self._generate_name(mispattr)
return self
def _enrich(self, attr, event=None, event_uuid_only=False):
if not event and 'Event' in attr:
event = attr['Event']
# META:
# attribute uuids
if not event_uuid_only:
uuid_meta = '{} ({})'.format(attr['uuid'], attr['type'])
self.add_meta('MISP_UUID', uuid_meta)
# event uuids
if event:
self.add_meta('MISP_EVENT_UUID', event['uuid'])
self.add_meta('MISP_EVENT_INFO', event['info'])
# other META and TAGS:
if self.loaded_from_source:
self.add_tag('as_is')
if self.autofixed:
self.add_tag('repaired')
origin_msg = 'Loaded from a corrupted Yara attribute, '\
+ 'automatically repaired.'\
+ 'Some comments may have been removed by parser. '\
+ 'Rule may be unreliable.'
self.add_meta('MISP_ORIGIN', origin_msg)
self.add_meta('MISP_FIX_NOTES', self.autofixed_comment)
else:
self.add_tag('valid')
validity_msg = 'Loaded as-is from a Yara attribute. ' \
+ 'Some comments may have been removed by parser.'
self.add_meta('MISP_ORIGIN', validity_msg)
else:
self.add_tag('generated')
self.add_meta('MISP_ORIGIN', 'Automatically generated ' \
+ 'from non-Yara attribute(s)')
return self
def _generate_name(self, attr):
if self.loaded_from_source:
pass
elif self._attributes_count == 1:
name = 'MISP_ATTRIBUTE_{}'.format(attr['uuid'])
self.set_name(name)
else:
rand_id = str(uuid.uuid4()).replace('-', '')
name = 'MISP_MULTI_ATTRIBUTES_{}'.format(rand_id)
self.set_name(name)
return self
def attr_count(self):
return self._attributes_count
def _handle(self, attr, opts):
attr_type = attr['type']
handler = self._get_type_handler(attr_type)
if handler:
handler(attr, opts)
return self
# =========================== ATTR HANDLERS ====================================
def _get_type_handler(self, attr_type):
handlers = {
'md5': self._md5,
'sha1': self._sha1,
'sha256': self._sha256,
# 'filename': self._filename, # unsupported by yara
'filename|md5': self._filename_md5,
'filename|sha1': self._filename_sha1,
'filename|sha256': self._filename_sha256,
'ip-src': self._ip_src,
'ip-dst': self._ip_dst,
'hostname': self._hostname,
'domain': self._domain,
'domain|ip': self._domain_ip,
'email-src': self._email_src,
'email-dst': self._email_dst,
'email-subject': self._email_subject,
'email-body': self._email_body,
'url': self._url,
'regkey': self._regkey,
'regkey|value': self._regkey_value,
'pattern-in-file': self._pattern_in_file,
'pattern-in-traffic': self._pattern_in_traffic,
'pattern-in-memory': self._pattern_in_memory,
# 'yara': self._yara, # specific case, see _yara2yaras()
'cookie': self._cookie,
'vulnerability': self._vulnerability,
'text': self._text,
'hex': self._hex,
'named pipe': self._named_pipe,
'mutex': self._mutex,
'btc': self._btc,
'xmr': self._xmr,
'uri': self._uri,
# 'authentihash': self._authentihash, # unsupported by yara
# 'ssdeep': self._ssdeep, # unsupported by yara
'imphash': self._imphash,
# 'pehash': self._pehash, # unsupported by yara
# 'impfuzzy': self._impfuzzy, # unsupported by yara
# 'sha224': self._sha224, # unsupported by yara
# 'sha384': self._sha384, # unsupported by yara
# 'sha512': self._sha512, # unsupported by yara
# 'sha512/224': self._sha512_224, # unsupported by yara
# 'sha512/256': self._sha512_256, # unsupported by yara
# 'tlsh': self._tlsh, # unsupported by yara
# 'cdhash': self._cdhash, # unsupported by yara
# 'filename|authentihash': self._filename_authentihash, # unsupported by yara
# 'filename|ssdeep': self._filename_ssdeep, # unsupported by yara
'filename|imphash': self._filename_imphash,
# 'filename|impfuzzy': self._filename_impfuzzy, # unsupported by yara
# 'filename|pehash': self._filename_pehash, # unsupported by yara
# 'filename|sha224': self._filename_sha224, # unsupported by yara
# 'filename|sha384': self._filename_sha384, # unsupported by yara
# 'filename|sha512': self._filename_sha512, # unsupported by yara
# 'filename|sha512/224': self._filename_sha512_224, # unsupported by yara
# 'filename|sha512/256': self._filename_sha512_256, # unsupported by yara
# 'filename|tlsh': self._filename_tlsh, # unsupported by yara
'windows-scheduled-task': self._windows_scheduled_task,
'windows-service-name': self._windows_service_name,
'windows-service-displayname': self._windows_service_displayname,
# 'x509-fingerprint-sha1': self._x509_fingerprint_sha1, # TODO check if doable
# 'x509-fingerprint-md5': self._x509_fingerprint_md5, # TODO check if doable
# 'x509-fingerprint-sha256': self._x509_fingerprint_sha256, # TODO check if doable
# 'size-in-bytes': self._size_in_bytes, # too many false positives
'ip-dst|port': self._ip_dst_port,
'ip-src|port': self._ip_src_port,
'hostname|port': self._hostname_port,
'email-dst-display-name': self._email_dst_display_name,
'email-src-display-name': self._email_src_display_name,
'email-header': self._email_header,
'email-reply-to': self._email_reply_to,
'email-x-mailer': self._email_x_mailer,
'email-mime-boundary': self._email_mime_boundary,
'email-thread-index': self._email_thread_index,
'email-message-id': self._email_message_id,
'github-username': self._github_username,
'github-repository': self._github_repository,
'github-organisation': self._github_organisation,
'mobile-application-id': self._mobile_application_id,
'user-agent': self._user_agent,
}
if attr_type in handlers:
return handlers[attr_type]
else:
return None
def __generic_string(self, value, opts):
self.strings_text(None, value,
escape_newlines=True,
nocase=False,
ascii=True,
wide=True,
xor=False,
fullword=False)
return self
def _md5(self, attr, opts):
filehash = attr['value']
self.add_module_dependency('hash')
self.or_condition('hash.md5(0, filesize) == "{}"'.format(filehash))
return self
def _sha1(self, attr, opts):
filehash = attr['value']
self.add_module_dependency('hash')
self.or_condition('hash.sha1(0, filesize) == "{}"'.format(filehash))
return self
def _sha256(self, attr, opts):
filehash = attr['value']
self.add_module_dependency('hash')
self.or_condition('hash.sha256(0, filesize) == "{}"'.format(filehash))
return self
# def _filename(self, attr, opts):
# self.__generic_string(attr['value'], opts)
# return self
def _filename_md5(self, attr, opts):
filename, _, filehash = attr['value'].rpartition('|')
self.add_module_dependency('hash')
self.or_condition('hash.md5(0, filesize) == "{}"'.format(filehash))
return self
def _filename_sha1(self, attr, opts):
filename, _, filehash = attr['value'].rpartition('|')
self.add_module_dependency('hash')
self.or_condition('hash.sha1(0, filesize) == "{}"'.format(filehash))
return self
def _filename_sha256(self, attr, opts):
filename, _, filehash = attr['value'].rpartition('|')
self.add_module_dependency('hash')
self.or_condition('hash.sha256(0, filesize) == "{}"'.format(filehash))
return self
def _ip_src(self, attr, opts):
self.__generic_string(attr['value'], opts)
return self
def _ip_dst(self, attr, opts):
self.__generic_string(attr['value'], opts)
return self
def _hostname(self, attr, opts):
self.__generic_string(attr['value'], opts)
return self
def _domain(self, attr, opts):
self.__generic_string(attr['value'], opts)
return self
def _domain_ip(self, attr, opts):
domain, _, ip = attr['value'].rpartition('|')
self.__generic_string(domain, opts)
self.__generic_string(ip, opts)
return self
def _email_src(self, attr, opts):
self.__generic_string(attr['value'], opts)
return self
def _email_dst(self, attr, opts):
self.__generic_string(attr['value'], opts)
return self
def _email_subject(self, attr, opts):
self.__generic_string(attr['value'], opts)
return self
def _email_body(self, attr, opts):
self.__generic_string(attr['value'], opts)
return self
def _url(self, attr, opts):
self.__generic_string(attr['value'], opts)
return self
def _regkey(self, attr, opts):
self.__generic_string(attr['value'], opts)
return self
def _regkey_value(self, attr, opts):
regkey, _, regvalue = attr['value'].rpartition('|')
self.__generic_string(regkey, opts)
self.__generic_string(regvalue, opts)
return self
def _pattern_in_file(self, attr, opts):
self.__generic_string(attr['value'], opts)
return self
def _pattern_in_traffic(self, attr, opts):
self.__generic_string(attr['value'], opts)
return self
def _pattern_in_memory(self, attr, opts):
self.__generic_string(attr['value'], opts)
return self
def _yara(self, attr, opts):
self.__generic_string(attr['value'], opts)
return self
def _cookie(self, attr, opts):
self.__generic_string(attr['value'], opts)
return self
def _vulnerability(self, attr, opts):
self.__generic_string(attr['value'], opts)
return self
def _text(self, attr, opts):
self.__generic_string(attr['value'], opts)
return self
def _hex(self, attr, opts):
self.strings_hex(None, attr['value'])
return self
def _named_pipe(self, attr, opts):
self.__generic_string(attr['value'], opts)
return self
def _mutex(self, attr, opts):
self.__generic_string(attr['value'], opts)
return self
def _btc(self, attr, opts):
self.__generic_string(attr['value'], opts)
return self
def _xmr(self, attr, opts):
self.__generic_string(attr['value'], opts)
return self
def _uri(self, attr, opts):
self.__generic_string(attr['value'], opts)
return self
# def _authentihash(self, attr, opts):
# self.__generic_string(attr['value'], opts)
# return self
#
# def _ssdeep(self, attr, opts):
# self.__generic_string(attr['value'], opts)
# return self
def _imphash(self, attr, opts):
filehash = attr['value']
self.add_module_dependency('pe')
self.or_condition('pe.imphash() == "{}"'.format(filehash))
return self
def _filename_imphash(self, attr, opts):
filename, _, filehash = attr['value'].rpartition('|')
self.add_module_dependency('pe')
self.or_condition('pe.imphash() == "{}"'.format(filehash))
return self
# def _filename_impfuzzy(self, attr, opts):
# self.__generic_string(attr['value'], opts)
# return self
#
# def _filename_pehash(self, attr, opts):
# self.__generic_string(attr['value'], opts)
# return self
#
# def _filename_sha224(self, attr, opts):
# self.__generic_string(attr['value'], opts)
# return self
#
# def _filename_sha384(self, attr, opts):
# self.__generic_string(attr['value'], opts)
# return self
#
# def _filename_sha512(self, attr, opts):
# self.__generic_string(attr['value'], opts)
# return self
#
# def _filename_sha512_224(self, attr, opts):
# self.__generic_string(attr['value'], opts)
# return self
#
# def _filename_sha512_256(self, attr, opts):
# self.__generic_string(attr['value'], opts)
# return self
#
# def _filename_tlsh(self, attr, opts):
# self.__generic_string(attr['value'], opts)
# return self
def _windows_scheduled_task(self, attr, opts):
self.__generic_string(attr['value'], opts)
return self
def _windows_service_name(self, attr, opts):
self.__generic_string(attr['value'], opts)
return self
def _windows_service_displayname(self, attr, opts):
self.__generic_string(attr['value'], opts)
return self
# TODO: check if that can be implemented
# def _x509_fingerprint_sha1(self, attr, opts):
# self.__generic_string(attr['value'], opts)
# return self
#
# def _x509_fingerprint_md5(self, attr, opts):
# self.__generic_string(attr['value'], opts)
# return self
#
# def _x509_fingerprint_sha256(self, attr, opts):
# self.__generic_string(attr['value'], opts)
# return self
# TODO: too many false-positives but could be OK in objects
# def _size_in_bytes(self, attr, opts):
# self.__generic_string(attr['value'], opts)
# return self
#
# likely false positives on ports, also can't guess ip:port format.
# Ignoring port
def _ip_dst_port(self, attr, opts):
ip, _, port = attr['value'].rpartition('|')
self.__generic_string(ip, opts)
return self
def _ip_src_port(self, attr, opts):
ip, _, port = attr['value'].rpartition('|')
self.__generic_string(ip, opts)
return self
def _hostname_port(self, attr, opts):
host, _, port = attr['value'].rpartition('|')
self.__generic_string(host, opts)
return self
def _email_dst_display_name(self, attr, opts):
self.__generic_string(attr['value'], opts)
return self
def _email_src_display_name(self, attr, opts):
self.__generic_string(attr['value'], opts)
return self
def _email_header(self, attr, opts):
self.__generic_string(attr['value'], opts)
return self
def _email_reply_to(self, attr, opts):
self.__generic_string(attr['value'], opts)
return self
def _email_x_mailer(self, attr, opts):
self.__generic_string(attr['value'], opts)
return self
def _email_mime_boundary(self, attr, opts):
self.__generic_string(attr['value'], opts)
return self
def _email_thread_index(self, attr, opts):
self.__generic_string(attr['value'], opts)
return self
def _email_message_id(self, attr, opts):
self.__generic_string(attr['value'], opts)
return self
def _github_username(self, attr, opts):
self.__generic_string(attr['value'], opts)
return self
def _github_repository(self, attr, opts):
self.__generic_string(attr['value'], opts)
return self
def _github_organisation(self, attr, opts):
self.__generic_string(attr['value'], opts)
return self
def _mobile_application_id(self, attr, opts):
self.__generic_string(attr['value'], opts)
return self
def _user_agent(self, attr, opts):
self.__generic_string(attr['value'], opts)
return self

View File

@ -0,0 +1,157 @@
import plyara
from plyara.exceptions import ParseError
from plyara import *
from unittest import mock
import re
import string
PERMISSIVE_MODE = True # set to False to use the regular, strict, plyara parser for debugging
_original_match = re.match
def _multiline_match(pattern, string, flags=0):
return _original_match(pattern, string, flags=flags | re.DOTALL | re.MULTILINE)
class _MultilinePlyara(plyara.Plyara):
def parse_string(self, input_string):
with mock.patch.object(re, 'match', _multiline_match):
return super(plyara.Plyara, self).parse_string(input_string)
class PermissivePlyara():
def parse_string(self, input_string):
try:
if PERMISSIVE_MODE:
return self._permissive_parse_string(input_string)
else:
return plyara.Plyara().parse_string(input_string)
except ParseError as e:
raise
# some errors are not properly caught by plyara
# convert everything to ParseError to avoid uncatchable crashes
except Exception as e:
raise ParseError('Uncaught plyara exception ({}): {}'.format(type(e).__name__, str(e)), None, None)
def _permissive_parse_string(self, input_string, fix_notes=None, original_error=None):
if not fix_notes:
fix_notes = set()
# with mock.patch.object(re, 'match', overridden_match):
try:
# res = super(Plyara, self).parse_string(input_string) # weird failures, couldn't debug. possibly due to internal state
# re-instanciating playra to avoid internal state errors
res = _MultilinePlyara().parse_string(input_string)
if fix_notes:
for r in res:
r['permissive_plyara_fixed'] = True
r['permissive_plyara_comment'] = '. '.join(fix_notes)
return res
except ParseError as e:
if not original_error:
original_error = e
str_error = str(e)
fixed = input_string
if str_error.startswith('Illegal character') and any(elem in str_error for elem in '”“″'):
fixed = _fix_quotes(input_string)
fix_notes.add('Wrong quotes characters')
elif str_error.startswith('Unknown text Rule'):
fixed = _fix_capital(input_string)
fix_notes.add('Rule => rule')
elif str_error.startswith('Illegal character'):
fixed = _fix_illegal_chars(input_string)
fix_notes.add('Illegal characters')
elif str_error.startswith('Unknown text { for token of type LBRACE') \
and input_string.lstrip().startswith \
and input_string.rstrip().endswith('}'):
fixed = _fix_noname(input_string)
fix_notes.add('Missing rule name')
elif re.match(r'Unknown text\s?_\s?for token of type ID', str_error):
fixed = _fix_spaced_underscores(input_string)
fix_notes.add("' _ ' => '_'")
else:
fixed = _fix_magic(input_string)
fix_notes.add('Magic fix (highly unreliable)')
if fixed != input_string:
return self._permissive_parse_string(fixed, fix_notes, original_error)
else:
raise original_error
# best_error = 'BEST GUESS ERROR: {}\n'.format(str(e))
# best_guess = 'BEST GUESS: \n{}'.format(input_string)
# raise ParseError(best_error+best_guess, None, None) from e
def _fix_quotes(yara_src):
repaired = yara_src
repaired = repaired.replace('', '"')
repaired = repaired.replace('', '"')
repaired = repaired.replace('', '"')
return repaired
def _fix_capital(yara_src):
repaired = yara_src.replace('Rule', 'rule')
return repaired
def _fix_illegal_chars(yara_src):
repaired = ''.join(filter(lambda x: x in string.printable, yara_src))
return repaired
def _fix_noname(yara_src):
repaired = 'rule UnnamedRule ' + yara_src
return repaired
def _fix_spaced_underscores(yara_src):
repaired = yara_src.replace(' _ ', '_')
return repaired
def _fix_magic(yara_src):
repaired = ''
for line in yara_src.splitlines():
if '//' not in line:
repaired += line
else:
repaired += '\n{}\n'.format(line)
return repaired if repaired else yara_src
# Keeping this code for later as it contains more advanced fixes
# def _try_simple_repairs(yara_src, error):
# reasons = []
# # common quotes error
# repaired = yara_src
# repaired = repaired.replace('”', '"')
# repaired = repaired.replace('“', '"')
# repaired = repaired.replace('″', '"')
# if repaired != yara_src:
# reasons.append('wrong quotes characters')
# # missing rule declaration
# if repaired.strip().startswith('{'):
# reasons.append('missing rule name')
# rulename = 'UnnamedRule'
# repaired = 'rule {} {}'.format(rulename, repaired)
# # capital letter rule declaration
# if repaired.strip().startswith('Rule'):
# reasons.append('Rule => rule')
# repaired = repaired.replace('Rule', 'rule')
# if 'Illegal character' in str(error):
# repaired = ''.join(filter(lambda x: x in string.printable, repaired))
# reasons.append('illegal characters')
# # badly formated declaration:
# # check if rule matches format : DATA rule DECLARATION { CONTENT } DATA
# split_source = re.split(r'rule\s(.*?){(.*)}', repaired, flags=re.MULTILINE|re.DOTALL)
# if len(split_source) == 4:
# split_source = {'pre': split_source[0],
# 'declaration': split_source[1].replace(' ', '').rstrip().rstrip(':'),
# 'content': split_source[2],
# 'post': split_source[3]}
# quoted_content = re.split(r'\"(.+?)\"', split_source['content'], re.MULTILINE | re.DOTALL)
# nolinebreak_content = ''
# for chunk in quoted_content: # remove line breaks in strings and meta
# if chunk.startswith('"') and chunk.endswith('"'):
# nolinebreak_content += ''.join(chunk.splitlines())
# reassembled = 'rule {} {{ {} }}'.format(split_source['declaration'], split_source['content'])
# repaired = reassembled
# return repaired, reasons

View File

@ -0,0 +1,94 @@
from misp2yara import mispevent2yara, mispattrs2yara, MISPRuleTemplate
import sys
import json
import os
from optparse import OptionParser
def rules2json_export(rules, extra_comment=''):
return json.dumps([rule2json_export(r) for r in rules])
def rule2json_export(rule, extra_comment=''):
json_dict = {
'value': str(rule),
'comment': '',
'valid': None
}
if isinstance(rule, MISPRuleTemplate):
if rule.loaded_from_source:
json_dict['comment'] += 'Loaded from source. '
else:
json_dict['comment'] += 'Generated. '
if rule.autofixed:
json_dict['comment'] += 'May be unreliable due to automatic repairs: '
json_dict['comment'] += rule.autofixed_comment
json_dict['valid'] = True
return json_dict
else:
json_dict['comment'] += 'Broken yara attribute. Could not parse or repair.'
json_dict['valid'] = False
return json_dict
def file_is_empty(path):
return os.stat(path).st_size==0
def output_json(output_path, output_rules):
with open(output_path, 'a+', encoding='utf-8') as f:
if file_is_empty(output_path):
pass
else:
f.write(',')
to_write = rules2json_export(output_rules)[1:-1]
f.write(to_write)
def output_raw(output_path, output_rules):
with open(output_path, 'a+', encoding='utf-8') as f:
to_write = '\n\n'.join([str(r) for r in output_rules])
f.write(to_write)
if __name__ == "__main__":
parser = OptionParser()
parser.add_option("-i", "--input", dest="in_file",
help="input file", metavar="FILE")
parser.add_option("-g", "--out-generated", dest="out_gen",
help="output for generated rules", metavar="FILE")
parser.add_option("-a", "--out-asis", dest="out_asis",
help="output for as-is rules", metavar="FILE")
parser.add_option("-r", "--raw",
action="store_true", dest="raw_output", default=False,
help="outputs raw yara rules instead of json-structured rules")
(options, args) = parser.parse_args()
in_path = options.in_file
out_path_gen = options.out_gen
out_path_asis = options.out_asis
raw_mode = options.raw_output
loaded = None
with open(in_path, 'r', encoding='utf-8') as in_file:
content = in_file.read()
if content:
loaded = json.loads(content)['response']
# raise Warning("loaded {}".format(content))
if 'Attribute' in loaded:
generated, asis_valid, asis_broken = mispattrs2yara(loaded['Attribute'])
elif isinstance(loaded, list):
generated = []
asis_valid = []
asis_broken = []
for event_dict in loaded:
if 'Event' in event_dict:
curr_generated, curr_asis_valid, curr_asis_broken = mispevent2yara(event_dict['Event'])
generated += curr_generated
asis_valid += curr_asis_valid
asis_broken += curr_asis_broken
else:
raise Exception('Json doesn\'t seem to be an list of attributes or events')
else:
raise Exception('Json doesn\'t seem to be an list of attributes or events')
if raw_mode:
output_raw(out_path_gen, generated)
output_raw(out_path_asis, asis_valid + asis_broken)
else:
output_json(out_path_gen, generated)
output_json(out_path_asis, asis_valid + asis_broken)

View File

@ -0,0 +1,352 @@
from permissive_plyara import PermissivePlyara
from permissive_plyara import ParseError
import plyara
from plyara import utils
import warnings
import re
__version__ = '0.1'
__yara_version__ = '8.1'
class YaraTemplateException(Exception):
def __init__(self, message, source=None):
super(Exception, self).__init__(message)
self.source = source
class YaraLexerException(YaraTemplateException):
pass
class YaraTemplateRuleConflictException(YaraTemplateException):
pass
class YaraTemplateRuleDependencyException(YaraTemplateException):
pass
class YaraRuleTemplate:
class _YaraStringsItem:
def __init__(self, stringstype, name, value,
modifiers, force_escape=True):
if not name.startswith('$'):
name = '${}'.format(name)
if stringstype == 'byte':
value = '{{ {} }}'.format(value)
elif stringstype == 'text':
if force_escape:
value = yara_escape_str(value)
value = '"{}"'.format(value)
elif stringstype == 'regex':
if force_escape:
# escape all unescaped '/'
value = re.sub(r'(?<=[^\\])/', r'\\'+r'/', value)
# # quick and dirty way to get rid of illegal line carriages in regexes
# value = ''.join([l.strip() for l in value.splitlines()])
value = '/{}/'.format(value)
self.stringstype = stringstype
self.name = name
self.value = value
self.modifiers = modifiers
def __str__(self):
name = self.name
value = self.value
modifiers = ' '.join(self.modifiers)
return "{} = {} {}".format(name, value, modifiers)
def __init__(self, rulename):
self.rulename = rulename
self.ruletags = set()
self.rulescopes = set() # can be empty, 'global' or 'private'
self.meta = set()
self.strings = [] # list instead of name=>value dict because of anonymous strings
self.condition = ''
self.file_dependencies = []
self.rule_dependencies = []
self.module_dependencies = []
self.loaded_from_source = False
self.autofixed = False
self.autofixed_comment = ''
@classmethod
def from_source(cls, yara_source):
if not isinstance(yara_source, str):
yara_source = str(yara_source)
try:
plyara_parsed = PermissivePlyara().parse_string(yara_source)
except ParseError as e:
raise YaraLexerException(str(e), yara_source)
rules = []
try:
for plyara_rule in plyara_parsed:
rule = cls._from_plyara(plyara_rule)
rules.append(rule)
return rules
except YaraTemplateException as e:
e.source = yara_source
raise
# Creates a YaraRuleTemplate from plyara's array output format
@classmethod
def _from_plyara(cls, plyara_out):
plyara_out = cls._ensure_one_rule(plyara_out)
rule = cls(plyara_out['rule_name'])
rule.loaded_from_source = True
if 'tags' in plyara_out:
rule.ruletags.update(plyara_out['tags'])
if 'scopes' in plyara_out:
rule.rulescopes.update(plyara_out['scopes'])
if 'metadata' in plyara_out:
for m in plyara_out['metadata']:
for k, v in m.items():
rule.add_meta(k,v)
if 'strings' in plyara_out:
for s in plyara_out['strings']:
s_modifiers = s['modifiers'] if 'modifiers' in s else []
if s['type'] == 'byte' or s['type'] == 'regex':
value = s['value'][1:-1]
else:
value = s['value']
rule._strings(s['type'], s['name'], value, s_modifiers)
if 'raw_condition' in plyara_out:
_, cond = plyara_out['raw_condition'].split("condition:",1)
rule.condition = cond
# parsing conditions is too tricky and prone to errors
# rule.condition = " ".join(plyara_out['condition_terms'])
else:
return rule # stop and return to avoid uncaught plyara exceptions
if 'includes' in plyara_out:
rule.file_dependencies = plyara_out['includes']
rule.rule_dependencies = plyara.utils.detect_dependencies(plyara_out)
rule.module_dependencies = plyara.utils.detect_imports(plyara_out)
if 'permissive_plyara_fixed' in plyara_out \
and plyara_out['permissive_plyara_fixed']:
rule.autofixed = True
if 'permissive_plyara_comment' in plyara_out:
rule.autofixed_comment = plyara_out['permissive_plyara_comment']
return rule
def __str__(self):
includes = set(self.file_dependencies)
imports = set(self.module_dependencies)
includes_str = '\n'.join(['include "{}"'.format(i) for i in includes])
imports_str = '\n'.join(['import "{}"'.format(i) for i in imports])
scopes = (' '.join(self.rulescopes) + ' ') if self.rulescopes else ''
tags_str = (' : ' + ' '.join(self.ruletags)) if self.ruletags else ''
declaration = '{}rule {}{}'.format(scopes, self.rulename, tags_str)
meta_section = ''
strings_section = ''
condition_section = ''
if self.meta:
sorted_meta = sorted(self.meta)
meta_section += '\tmeta:'
for (m, v) in sorted_meta:
meta_section += '\n\t\t{} = "{}"'.format(m, v)
meta_section += '\n'
if self.strings:
strings_section += '\tstrings:'
for s in self.strings:
strings_section += '\n\t\t{}'.format(s)
strings_section += '\n'
if self.condition:
condition_section += '\tcondition:'
for cond_line in self.condition.splitlines():
stripped = cond_line.strip()
if stripped:
condition_section += '\n\t\t{}'.format(stripped)
result = '{}\n{}\n{}\n{{\n{}{}{}\n}}'.format(includes_str,
imports_str,
declaration,
meta_section,
strings_section,
condition_section)
if not self.condition:
result = '// this rule will not compile (mandatory "condition" section missing)\n{}'.format(result)
return result
def add_meta(self, meta_key, meta_value):
# remove illegal characters (same filter as "strings" entries)
meta_value = yara_escape_str(str(meta_value))
self.meta.add((meta_key, meta_value))
return self
def set_name(self, name):
# replace forbidden characters with '_'
name = re.sub(r'[^A-Za-z0-9_]', '_', name)
if name[0].isdigit():
name = '_{}'.format(name)
self.rulename = name
return self
def add_tag(self, tag):
# replace forbidden characters with '_'
tag = re.sub(r'[^A-Za-z0-9_]', '_', tag)
if tag[0].isdigit():
tag = '_{}'.format(tag)
self.ruletags.add(tag)
return self
def set_condition(self, condition_expression):
self.condition = condition_expression
return self
def and_condition(self, condition_expression):
if not self.condition:
self.condition = '{}'.format(condition_expression)
else:
self.condition = '{}\n and {}'.format(self.condition,
condition_expression)
return self
def or_condition(self, condition_expression):
if not self.condition:
self.condition = '{}'.format(condition_expression)
else:
self.condition = '{}\n or {}'.format(self.condition,
condition_expression)
return self
# Adds an entry to the 'strings' section
# str_type can be 'byte', 'text' or 'regex'
# name could be None for anonymous strings
def _strings(self, str_type, name, value, modifiers):
if name == '$' or not name:
name = '$'
force_escape = False if self.loaded_from_source else True
str_entry = self._YaraStringsItem(str_type, name, value, modifiers, force_escape)
if str_entry.name == '$' or str_entry.name not in (o.name for o in self.strings):
self.strings.append(str_entry)
else:
raise YaraTemplateException(
'There is already a string named "{}"'.format(str_entry.name))
return self
# adds a 'byte' entry ({}) to strings section (default: nocase ascii wide)
def strings_hex(self, name, value):
self._strings('byte', name, value, [])
return self
# adds a 'text' entry ("") to strings section (default: nocase ascii wide)
def strings_text(self, name, value, escape_newlines=True, nocase=True,
ascii=True, wide=True, xor=False, fullword=False):
modifiers = []
# escaping unescaped double quotes
if nocase:
modifiers.append('nocase')
if ascii:
modifiers.append('ascii')
if wide:
modifiers.append('wide')
if xor:
modifiers.append('xor')
if fullword:
modifiers.append('fullword')
if escape_newlines and len(value.splitlines()) > 1:
# only regex supports system-agnostic line breaks
value = _str2yara_regex(value)
self._strings('regex', name, value, modifiers)
elif len(value.splitlines()) > 1:
for line in value.splitlines():
self._strings('text', name, line, modifiers)
# TODO: imporvement: group lines with 'all of $*'
# instead of (\r|\r\n|\n|\x1E)
else:
self._strings('text', name, value, modifiers)
return self
# adds a 'regex' entry (//) to strings section (default: nocase ascii wide)
def strings_regex(self, name, value, nocase=True, ascii=True,
wide=True, fullword=False):
modifiers = []
if nocase:
modifiers.append('nocase')
if ascii:
modifiers.append('ascii')
if wide:
modifiers.append('wide')
if fullword:
modifiers.append('fullword')
self._strings('regex', name, value, modifiers)
return self
# adds an 'include' statement
def add_file_dependency(self, file_name):
if file_name not in self.file_dependencies:
self.file_dependencies.append(file_name)
return self
# adds an rule dependency, useful to determine the order in a group of rules
def add_rule_dependency(self, rule_name):
if rule_name not in self.rule_dependencies:
self.rule_dependencies.append(rule_name)
return self
# adds an 'import' dependency
def add_module_dependency(self, module_name):
if module_name not in self.module_dependencies:
self.module_dependencies.append(module_name)
return self
@staticmethod
def _ensure_one_rule(plyara_output):
if isinstance(plyara_output, list):
if len(plyara_output) != 1:
error_msg = 'Single rule expected, \
string contains {} rules'.format(len(plyara_output))
raise YaraTemplateException(error_msg)
else:
return plyara_output[0]
else:
return plyara_output
# =============== Tools ===================
# replaces special characters in yara 'text' strings
def yara_escape_str(pattern):
_special_chars_map = {
ord(b'\\'): '\\\\',
ord(b'"'): '\\"',
ord(b'\n'): '\\n',
ord(b'\t'): '\\t',
ord(b'\r'): '\\\\r'
}
return pattern.translate(_special_chars_map)
# helps convert a python string to a yara 'regex' string, escapes special chars
# handles newlines by making them system-agnostic and optional
def _str2yara_regex(pattern):
_special_chars_map = {
ord(b'/'): '\\/',
# covers '\' and all escapes not valid in python but valid in yara:
# \w \W \s \S \d \D \B
ord(b'\\'): '\\\\',
ord(b'^'): '\\^',
ord(b'$'): '\\$',
ord(b'|'): '\\|',
ord(b'('): '\\(',
ord(b')'): '\\)',
ord(b'['): '\\[',
ord(b']'): '\\]',
ord(b'*'): '\\*',
ord(b'+'): '\\+',
ord(b'?'): '\\?',
ord(b'{'): '\\{',
ord(b'}'): '\\}',
ord(b'\t'): '\\t',
ord(b'\f'): '\\f',
ord(b'\a'): '\\a',
# covers \n \r\n \r and other exotic line breaks (\x1E)
ord(b'\n'): '(\\x0D|\\x0A\\x0D|\\x0A|\\x1E)?',
ord(b'\b'): '\\b'
}
pattern = '\n'.join(pattern.splitlines())
return pattern.translate(_special_chars_map)

0
app/tmp/yara/empty Normal file
View File

View File

@ -7,3 +7,4 @@ requests-mock
pip
nose
jsonschema
plyara >= 2.0.2