mirror of https://github.com/MISP/MISP
158 lines
6.3 KiB
Python
158 lines
6.3 KiB
Python
import plyara
|
|
from plyara.exceptions import ParseError
|
|
from plyara import *
|
|
from unittest import mock
|
|
import re
|
|
import string
|
|
|
|
PERMISSIVE_MODE = True # set to False to use the regular, strict, plyara parser for debugging
|
|
|
|
|
|
_original_match = re.match
|
|
def _multiline_match(pattern, string, flags=0):
|
|
return _original_match(pattern, string, flags=flags | re.DOTALL | re.MULTILINE)
|
|
|
|
|
|
class _MultilinePlyara(plyara.Plyara):
|
|
|
|
def parse_string(self, input_string):
|
|
with mock.patch.object(re, 'match', _multiline_match):
|
|
return super(plyara.Plyara, self).parse_string(input_string)
|
|
|
|
|
|
class PermissivePlyara():
|
|
|
|
def parse_string(self, input_string):
|
|
try:
|
|
if PERMISSIVE_MODE:
|
|
return self._permissive_parse_string(input_string)
|
|
else:
|
|
return plyara.Plyara().parse_string(input_string)
|
|
except ParseError as e:
|
|
raise
|
|
# some errors are not properly caught by plyara
|
|
# convert everything to ParseError to avoid uncatchable crashes
|
|
except Exception as e:
|
|
raise ParseError('Uncaught plyara exception ({}): {}'.format(type(e).__name__, str(e)), None, None)
|
|
|
|
def _permissive_parse_string(self, input_string, fix_notes=None, original_error=None):
|
|
if not fix_notes:
|
|
fix_notes = set()
|
|
# with mock.patch.object(re, 'match', overridden_match):
|
|
try:
|
|
# res = super(Plyara, self).parse_string(input_string) # weird failures, couldn't debug. possibly due to internal state
|
|
# re-instanciating playra to avoid internal state errors
|
|
res = _MultilinePlyara().parse_string(input_string)
|
|
if fix_notes:
|
|
for r in res:
|
|
r['permissive_plyara_fixed'] = True
|
|
r['permissive_plyara_comment'] = '. '.join(fix_notes)
|
|
return res
|
|
except ParseError as e:
|
|
if not original_error:
|
|
original_error = e
|
|
str_error = str(e)
|
|
fixed = input_string
|
|
if str_error.startswith('Illegal character') and any(elem in str_error for elem in '”“″'):
|
|
fixed = _fix_quotes(input_string)
|
|
fix_notes.add('Wrong quotes characters')
|
|
elif str_error.startswith('Unknown text Rule'):
|
|
fixed = _fix_capital(input_string)
|
|
fix_notes.add('Rule => rule')
|
|
elif str_error.startswith('Illegal character'):
|
|
fixed = _fix_illegal_chars(input_string)
|
|
fix_notes.add('Illegal characters')
|
|
elif str_error.startswith('Unknown text { for token of type LBRACE') \
|
|
and input_string.lstrip().startswith \
|
|
and input_string.rstrip().endswith('}'):
|
|
fixed = _fix_noname(input_string)
|
|
fix_notes.add('Missing rule name')
|
|
elif re.match(r'Unknown text\s?_\s?for token of type ID', str_error):
|
|
fixed = _fix_spaced_underscores(input_string)
|
|
fix_notes.add("' _ ' => '_'")
|
|
else:
|
|
fixed = _fix_magic(input_string)
|
|
fix_notes.add('Magic fix (highly unreliable)')
|
|
|
|
if fixed != input_string:
|
|
return self._permissive_parse_string(fixed, fix_notes, original_error)
|
|
else:
|
|
raise original_error
|
|
# best_error = 'BEST GUESS ERROR: {}\n'.format(str(e))
|
|
# best_guess = 'BEST GUESS: \n{}'.format(input_string)
|
|
# raise ParseError(best_error+best_guess, None, None) from e
|
|
|
|
|
|
def _fix_quotes(yara_src):
|
|
repaired = yara_src
|
|
repaired = repaired.replace('”', '"')
|
|
repaired = repaired.replace('“', '"')
|
|
repaired = repaired.replace('″', '"')
|
|
return repaired
|
|
|
|
def _fix_capital(yara_src):
|
|
repaired = yara_src.replace('Rule', 'rule')
|
|
return repaired
|
|
|
|
def _fix_illegal_chars(yara_src):
|
|
repaired = ''.join(filter(lambda x: x in string.printable, yara_src))
|
|
return repaired
|
|
|
|
def _fix_noname(yara_src):
|
|
repaired = 'rule UnnamedRule ' + yara_src
|
|
return repaired
|
|
|
|
def _fix_spaced_underscores(yara_src):
|
|
repaired = yara_src.replace(' _ ', '_')
|
|
return repaired
|
|
|
|
def _fix_magic(yara_src):
|
|
repaired = ''
|
|
for line in yara_src.splitlines():
|
|
if '//' not in line:
|
|
repaired += line
|
|
else:
|
|
repaired += '\n{}\n'.format(line)
|
|
return repaired if repaired else yara_src
|
|
|
|
|
|
# Keeping this code for later as it contains more advanced fixes
|
|
|
|
# def _try_simple_repairs(yara_src, error):
|
|
# reasons = []
|
|
# # common quotes error
|
|
# repaired = yara_src
|
|
# repaired = repaired.replace('”', '"')
|
|
# repaired = repaired.replace('“', '"')
|
|
# repaired = repaired.replace('″', '"')
|
|
# if repaired != yara_src:
|
|
# reasons.append('wrong quotes characters')
|
|
# # missing rule declaration
|
|
# if repaired.strip().startswith('{'):
|
|
# reasons.append('missing rule name')
|
|
# rulename = 'UnnamedRule'
|
|
# repaired = 'rule {} {}'.format(rulename, repaired)
|
|
# # capital letter rule declaration
|
|
# if repaired.strip().startswith('Rule'):
|
|
# reasons.append('Rule => rule')
|
|
# repaired = repaired.replace('Rule', 'rule')
|
|
# if 'Illegal character' in str(error):
|
|
# repaired = ''.join(filter(lambda x: x in string.printable, repaired))
|
|
# reasons.append('illegal characters')
|
|
# # badly formated declaration:
|
|
# # check if rule matches format : DATA rule DECLARATION { CONTENT } DATA
|
|
# split_source = re.split(r'rule\s(.*?){(.*)}', repaired, flags=re.MULTILINE|re.DOTALL)
|
|
# if len(split_source) == 4:
|
|
# split_source = {'pre': split_source[0],
|
|
# 'declaration': split_source[1].replace(' ', '').rstrip().rstrip(':'),
|
|
# 'content': split_source[2],
|
|
# 'post': split_source[3]}
|
|
# quoted_content = re.split(r'\"(.+?)\"', split_source['content'], re.MULTILINE | re.DOTALL)
|
|
# nolinebreak_content = ''
|
|
# for chunk in quoted_content: # remove line breaks in strings and meta
|
|
# if chunk.startswith('"') and chunk.endswith('"'):
|
|
# nolinebreak_content += ''.join(chunk.splitlines())
|
|
# reassembled = 'rule {} {{ {} }}'.format(split_source['declaration'], split_source['content'])
|
|
# repaired = reassembled
|
|
# return repaired, reasons
|