from permissive_plyara import PermissivePlyara from permissive_plyara import ParseError import plyara from plyara import utils import warnings import re __version__ = '0.1' __yara_version__ = '8.1' class YaraTemplateException(Exception): def __init__(self, message, source=None): super(Exception, self).__init__(message) self.source = source class YaraLexerException(YaraTemplateException): pass class YaraTemplateRuleConflictException(YaraTemplateException): pass class YaraTemplateRuleDependencyException(YaraTemplateException): pass class YaraRuleTemplate: class _YaraStringsItem: def __init__(self, stringstype, name, value, modifiers, force_escape=True): if not name.startswith('$'): name = '${}'.format(name) if stringstype == 'byte': value = '{{ {} }}'.format(value) elif stringstype == 'text': if force_escape: value = yara_escape_str(value) value = '"{}"'.format(value) elif stringstype == 'regex': if force_escape: # escape all unescaped '/' value = re.sub(r'(?<=[^\\])/', r'\\'+r'/', value) # # quick and dirty way to get rid of illegal line carriages in regexes # value = ''.join([l.strip() for l in value.splitlines()]) value = '/{}/'.format(value) self.stringstype = stringstype self.name = name self.value = value self.modifiers = modifiers def __str__(self): name = self.name value = self.value modifiers = ' '.join(self.modifiers) return "{} = {} {}".format(name, value, modifiers) def __init__(self, rulename): self.rulename = rulename self.ruletags = set() self.rulescopes = set() # can be empty, 'global' or 'private' self.meta = set() self.strings = [] # list instead of name=>value dict because of anonymous strings self.condition = '' self.file_dependencies = [] self.rule_dependencies = [] self.module_dependencies = [] self.loaded_from_source = False self.autofixed = False self.autofixed_comment = '' @classmethod def from_source(cls, yara_source): if not isinstance(yara_source, str): yara_source = str(yara_source) try: plyara_parsed = PermissivePlyara().parse_string(yara_source) except ParseError as e: raise YaraLexerException(str(e), yara_source) rules = [] try: for plyara_rule in plyara_parsed: rule = cls._from_plyara(plyara_rule) rules.append(rule) return rules except YaraTemplateException as e: e.source = yara_source raise # Creates a YaraRuleTemplate from plyara's array output format @classmethod def _from_plyara(cls, plyara_out): plyara_out = cls._ensure_one_rule(plyara_out) rule = cls(plyara_out['rule_name']) rule.loaded_from_source = True if 'tags' in plyara_out: rule.ruletags.update(plyara_out['tags']) if 'scopes' in plyara_out: rule.rulescopes.update(plyara_out['scopes']) if 'metadata' in plyara_out: for m in plyara_out['metadata']: for k, v in m.items(): rule.add_meta(k,v) if 'strings' in plyara_out: for s in plyara_out['strings']: s_modifiers = s['modifiers'] if 'modifiers' in s else [] if s['type'] == 'byte' or s['type'] == 'regex': value = s['value'][1:-1] else: value = s['value'] rule._strings(s['type'], s['name'], value, s_modifiers) if 'raw_condition' in plyara_out: _, cond = plyara_out['raw_condition'].split("condition:",1) rule.condition = cond # parsing conditions is too tricky and prone to errors # rule.condition = " ".join(plyara_out['condition_terms']) else: return rule # stop and return to avoid uncaught plyara exceptions if 'includes' in plyara_out: rule.file_dependencies = plyara_out['includes'] rule.rule_dependencies = plyara.utils.detect_dependencies(plyara_out) rule.module_dependencies = plyara.utils.detect_imports(plyara_out) if 'permissive_plyara_fixed' in plyara_out \ and plyara_out['permissive_plyara_fixed']: rule.autofixed = True if 'permissive_plyara_comment' in plyara_out: rule.autofixed_comment = plyara_out['permissive_plyara_comment'] return rule def __str__(self): includes = set(self.file_dependencies) imports = set(self.module_dependencies) includes_str = '\n'.join(['include "{}"'.format(i) for i in includes]) imports_str = '\n'.join(['import "{}"'.format(i) for i in imports]) scopes = (' '.join(self.rulescopes) + ' ') if self.rulescopes else '' tags_str = (' : ' + ' '.join(self.ruletags)) if self.ruletags else '' declaration = '{}rule {}{}'.format(scopes, self.rulename, tags_str) meta_section = '' strings_section = '' condition_section = '' if self.meta: sorted_meta = sorted(self.meta) meta_section += '\tmeta:' for (m, v) in sorted_meta: meta_section += '\n\t\t{} = "{}"'.format(m, v) meta_section += '\n' if self.strings: strings_section += '\tstrings:' for s in self.strings: strings_section += '\n\t\t{}'.format(s) strings_section += '\n' if self.condition: condition_section += '\tcondition:' for cond_line in self.condition.splitlines(): stripped = cond_line.strip() if stripped: condition_section += '\n\t\t{}'.format(stripped) result = '{}\n{}\n{}\n{{\n{}{}{}\n}}'.format(includes_str, imports_str, declaration, meta_section, strings_section, condition_section) if not self.condition: result = '// this rule will not compile (mandatory "condition" section missing)\n{}'.format(result) return result def add_meta(self, meta_key, meta_value): # remove illegal characters (same filter as "strings" entries) meta_value = yara_escape_str(str(meta_value)) self.meta.add((meta_key, meta_value)) return self def set_name(self, name): # replace forbidden characters with '_' name = re.sub(r'[^A-Za-z0-9_]', '_', name) if name[0].isdigit(): name = '_{}'.format(name) self.rulename = name return self def add_tag(self, tag): # replace forbidden characters with '_' tag = re.sub(r'[^A-Za-z0-9_]', '_', tag) if tag[0].isdigit(): tag = '_{}'.format(tag) self.ruletags.add(tag) return self def set_condition(self, condition_expression): self.condition = condition_expression return self def and_condition(self, condition_expression): if not self.condition: self.condition = '{}'.format(condition_expression) else: self.condition = '{}\n and {}'.format(self.condition, condition_expression) return self def or_condition(self, condition_expression): if not self.condition: self.condition = '{}'.format(condition_expression) else: self.condition = '{}\n or {}'.format(self.condition, condition_expression) return self # Adds an entry to the 'strings' section # str_type can be 'byte', 'text' or 'regex' # name could be None for anonymous strings def _strings(self, str_type, name, value, modifiers): if name == '$' or not name: name = '$' force_escape = False if self.loaded_from_source else True str_entry = self._YaraStringsItem(str_type, name, value, modifiers, force_escape) if str_entry.name == '$' or str_entry.name not in (o.name for o in self.strings): self.strings.append(str_entry) else: raise YaraTemplateException( 'There is already a string named "{}"'.format(str_entry.name)) return self # adds a 'byte' entry ({}) to strings section (default: nocase ascii wide) def strings_hex(self, name, value): self._strings('byte', name, value, []) return self # adds a 'text' entry ("") to strings section (default: nocase ascii wide) def strings_text(self, name, value, escape_newlines=True, nocase=True, ascii=True, wide=True, xor=False, fullword=False): modifiers = [] # escaping unescaped double quotes if nocase: modifiers.append('nocase') if ascii: modifiers.append('ascii') if wide: modifiers.append('wide') if xor: modifiers.append('xor') if fullword: modifiers.append('fullword') if escape_newlines and len(value.splitlines()) > 1: # only regex supports system-agnostic line breaks value = _str2yara_regex(value) self._strings('regex', name, value, modifiers) elif len(value.splitlines()) > 1: for line in value.splitlines(): self._strings('text', name, line, modifiers) # TODO: imporvement: group lines with 'all of $*' # instead of (\r|\r\n|\n|\x1E) else: self._strings('text', name, value, modifiers) return self # adds a 'regex' entry (//) to strings section (default: nocase ascii wide) def strings_regex(self, name, value, nocase=True, ascii=True, wide=True, fullword=False): modifiers = [] if nocase: modifiers.append('nocase') if ascii: modifiers.append('ascii') if wide: modifiers.append('wide') if fullword: modifiers.append('fullword') self._strings('regex', name, value, modifiers) return self # adds an 'include' statement def add_file_dependency(self, file_name): if file_name not in self.file_dependencies: self.file_dependencies.append(file_name) return self # adds an rule dependency, useful to determine the order in a group of rules def add_rule_dependency(self, rule_name): if rule_name not in self.rule_dependencies: self.rule_dependencies.append(rule_name) return self # adds an 'import' dependency def add_module_dependency(self, module_name): if module_name not in self.module_dependencies: self.module_dependencies.append(module_name) return self @staticmethod def _ensure_one_rule(plyara_output): if isinstance(plyara_output, list): if len(plyara_output) != 1: error_msg = 'Single rule expected, \ string contains {} rules'.format(len(plyara_output)) raise YaraTemplateException(error_msg) else: return plyara_output[0] else: return plyara_output # =============== Tools =================== # replaces special characters in yara 'text' strings def yara_escape_str(pattern): _special_chars_map = { ord(b'\\'): '\\\\', ord(b'"'): '\\"', ord(b'\n'): '\\n', ord(b'\t'): '\\t', ord(b'\r'): '\\\\r' } return pattern.translate(_special_chars_map) # helps convert a python string to a yara 'regex' string, escapes special chars # handles newlines by making them system-agnostic and optional def _str2yara_regex(pattern): _special_chars_map = { ord(b'/'): '\\/', # covers '\' and all escapes not valid in python but valid in yara: # \w \W \s \S \d \D \B ord(b'\\'): '\\\\', ord(b'^'): '\\^', ord(b'$'): '\\$', ord(b'|'): '\\|', ord(b'('): '\\(', ord(b')'): '\\)', ord(b'['): '\\[', ord(b']'): '\\]', ord(b'*'): '\\*', ord(b'+'): '\\+', ord(b'?'): '\\?', ord(b'{'): '\\{', ord(b'}'): '\\}', ord(b'\t'): '\\t', ord(b'\f'): '\\f', ord(b'\a'): '\\a', # covers \n \r\n \r and other exotic line breaks (\x1E) ord(b'\n'): '(\\x0D|\\x0A\\x0D|\\x0A|\\x1E)?', ord(b'\b'): '\\b' } pattern = '\n'.join(pattern.splitlines()) return pattern.translate(_special_chars_map)