
158 lines
6.3 KiB

import plyara
from plyara.exceptions import ParseError
from plyara import *
from unittest import mock
import re
import string
PERMISSIVE_MODE = True # set to False to use the regular, strict, plyara parser for debugging
_original_match = re.match
def _multiline_match(pattern, string, flags=0):
return _original_match(pattern, string, flags=flags | re.DOTALL | re.MULTILINE)
class _MultilinePlyara(plyara.Plyara):
def parse_string(self, input_string):
with mock.patch.object(re, 'match', _multiline_match):
return super(plyara.Plyara, self).parse_string(input_string)
class PermissivePlyara():
def parse_string(self, input_string):
return self._permissive_parse_string(input_string)
return plyara.Plyara().parse_string(input_string)
except ParseError as e:
# some errors are not properly caught by plyara
# convert everything to ParseError to avoid uncatchable crashes
except Exception as e:
raise ParseError('Uncaught plyara exception ({}): {}'.format(type(e).__name__, str(e)), None, None)
def _permissive_parse_string(self, input_string, fix_notes=None, original_error=None):
if not fix_notes:
fix_notes = set()
# with mock.patch.object(re, 'match', overridden_match):
# res = super(Plyara, self).parse_string(input_string) # weird failures, couldn't debug. possibly due to internal state
# re-instanciating playra to avoid internal state errors
res = _MultilinePlyara().parse_string(input_string)
if fix_notes:
for r in res:
r['permissive_plyara_fixed'] = True
r['permissive_plyara_comment'] = '. '.join(fix_notes)
return res
except ParseError as e:
if not original_error:
original_error = e
str_error = str(e)
fixed = input_string
if str_error.startswith('Illegal character') and any(elem in str_error for elem in '”“″'):
fixed = _fix_quotes(input_string)
fix_notes.add('Wrong quotes characters')
elif str_error.startswith('Unknown text Rule'):
fixed = _fix_capital(input_string)
fix_notes.add('Rule => rule')
elif str_error.startswith('Illegal character'):
fixed = _fix_illegal_chars(input_string)
fix_notes.add('Illegal characters')
elif str_error.startswith('Unknown text { for token of type LBRACE') \
and input_string.lstrip().startswith \
and input_string.rstrip().endswith('}'):
fixed = _fix_noname(input_string)
fix_notes.add('Missing rule name')
elif re.match(r'Unknown text\s?_\s?for token of type ID', str_error):
fixed = _fix_spaced_underscores(input_string)
fix_notes.add("' _ ' => '_'")
fixed = _fix_magic(input_string)
fix_notes.add('Magic fix (highly unreliable)')
if fixed != input_string:
return self._permissive_parse_string(fixed, fix_notes, original_error)
raise original_error
# best_error = 'BEST GUESS ERROR: {}\n'.format(str(e))
# best_guess = 'BEST GUESS: \n{}'.format(input_string)
# raise ParseError(best_error+best_guess, None, None) from e
def _fix_quotes(yara_src):
repaired = yara_src
repaired = repaired.replace('', '"')
repaired = repaired.replace('', '"')
repaired = repaired.replace('', '"')
return repaired
def _fix_capital(yara_src):
repaired = yara_src.replace('Rule', 'rule')
return repaired
def _fix_illegal_chars(yara_src):
repaired = ''.join(filter(lambda x: x in string.printable, yara_src))
return repaired
def _fix_noname(yara_src):
repaired = 'rule UnnamedRule ' + yara_src
return repaired
def _fix_spaced_underscores(yara_src):
repaired = yara_src.replace(' _ ', '_')
return repaired
def _fix_magic(yara_src):
repaired = ''
for line in yara_src.splitlines():
if '//' not in line:
repaired += line
repaired += '\n{}\n'.format(line)
return repaired if repaired else yara_src
# Keeping this code for later as it contains more advanced fixes
# def _try_simple_repairs(yara_src, error):
# reasons = []
# # common quotes error
# repaired = yara_src
# repaired = repaired.replace('”', '"')
# repaired = repaired.replace('“', '"')
# repaired = repaired.replace('″', '"')
# if repaired != yara_src:
# reasons.append('wrong quotes characters')
# # missing rule declaration
# if repaired.strip().startswith('{'):
# reasons.append('missing rule name')
# rulename = 'UnnamedRule'
# repaired = 'rule {} {}'.format(rulename, repaired)
# # capital letter rule declaration
# if repaired.strip().startswith('Rule'):
# reasons.append('Rule => rule')
# repaired = repaired.replace('Rule', 'rule')
# if 'Illegal character' in str(error):
# repaired = ''.join(filter(lambda x: x in string.printable, repaired))
# reasons.append('illegal characters')
# # badly formated declaration:
# # check if rule matches format : DATA rule DECLARATION { CONTENT } DATA
# split_source = re.split(r'rule\s(.*?){(.*)}', repaired, flags=re.MULTILINE|re.DOTALL)
# if len(split_source) == 4:
# split_source = {'pre': split_source[0],
# 'declaration': split_source[1].replace(' ', '').rstrip().rstrip(':'),
# 'content': split_source[2],
# 'post': split_source[3]}
# quoted_content = re.split(r'\"(.+?)\"', split_source['content'], re.MULTILINE | re.DOTALL)
# nolinebreak_content = ''
# for chunk in quoted_content: # remove line breaks in strings and meta
# if chunk.startswith('"') and chunk.endswith('"'):
# nolinebreak_content += ''.join(chunk.splitlines())
# reassembled = 'rule {} {{ {} }}'.format(split_source['declaration'], split_source['content'])
# repaired = reassembled
# return repaired, reasons