chg: Working prototype for user-defined context

pull/86/head
Raphaël Vinot 2020-08-28 18:03:52 +02:00
parent 611b2a8ff5
commit 97b5b2d77f
3 changed files with 102 additions and 29 deletions

View File

@ -184,10 +184,12 @@ class Context():
if filename == 'generic':
for k, type_content in file_content.items():
p.hmset('known_content', {h: type_content['description'] for h in type_content['entries']})
elif filename == 'malicious':
for h, details in file_content.items():
p.sadd('bh|malicious', h)
else:
for mimetype, entry in file_content.items():
for h, details in entry.items():
p.sadd(f'bh|{h}|legitimate', *details['hostnames'])
for h, details in file_content.items():
p.sadd(f'bh|{h}|legitimate', *details['hostnames'])
p.execute()
def find_known_content(self, har2tree_container: Union[CrawledTree, HostNode, URLNode]) -> Dict[str, Union[str, List[str]]]:
@ -228,21 +230,31 @@ class Context():
else:
to_store = {}
for urlnode, h in self._filter(urlnodes, known_content):
if urlnode.mimetype:
mimetype = urlnode.mimetype.split(';')[0]
if mimetype not in to_store:
to_store[mimetype] = {}
if h not in to_store[mimetype]:
to_store[mimetype][h] = {'filenames': set(), 'description': '', 'hostnames': set()}
mimetype = ''
if h != urlnode.body_hash:
# this is the hash of an embeded content so it won't have a filename but has a different mimetype
# FIXME: this is ugly.
for ressource_mimetype, blobs in urlnode.embedded_ressources.items():
for ressource_h, b in blobs:
if ressource_h == h:
mimetype = ressource_mimetype.split(';')[0]
break
if mimetype:
break
else:
to_store[mimetype][h]['filenames'] = set(to_store[mimetype][h]['filenames'])
to_store[mimetype][h]['hostnames'] = set(to_store[mimetype][h]['hostnames'])
if urlnode.mimetype:
mimetype = urlnode.mimetype.split(';')[0]
if h not in to_store:
to_store[h] = {'filenames': set(), 'description': '', 'hostnames': set(), 'mimetype': mimetype}
else:
to_store[h]['filenames'] = set(to_store[h]['filenames'])
to_store[h]['hostnames'] = set(to_store[h]['hostnames'])
to_store[mimetype][h]['hostnames'].add(urlnode.hostname)
to_store[h]['hostnames'].add(urlnode.hostname)
if urlnode.url_split.path:
filename = Path(urlnode.url_split.path).name
if filename:
to_store[mimetype][h]['filenames'].add(filename)
to_store[h]['filenames'].add(filename)
with open(known_content_file, 'w') as f:
json.dump(to_store, f, indent=2, default=dump_to_json)
@ -258,6 +270,8 @@ class Context():
known_content = self.find_known_content(tree)
pipeline = self.redis.pipeline()
for urlnode, h in self._filter(urlnodes, known_content):
# Note: we can have multiple hahes on the same urlnode (see embedded resources).
# They are expected to be on the same domain as urlnode. This code work as expected.
pipeline.sadd(f'bh|{h}|legitimate', urlnode.hostname)
pipeline.execute()
@ -277,9 +291,37 @@ class Context():
def legitimate_body(self, body_hash: str, legitimate_hostname: str) -> None:
self.redis.sadd(f'bh|{body_hash}|legitimate', legitimate_hostname)
def malicious_node(self, urlnode: URLNode, known_hashes: Iterable[str]) -> None:
for _, h in self._filter(urlnode, known_hashes):
self.redis.sadd('bh|malicious', h)
def store_known_malicious_ressource(self, ressource_hash: str, details: Dict[str, str]):
known_malicious_ressource_file = get_homedir() / 'known_content' / 'malicious.json'
if known_malicious_ressource_file.exists():
with open(known_malicious_ressource_file) as f:
to_store = json.load(f)
else:
to_store = {}
if ressource_hash not in to_store:
to_store[ressource_hash] = {'target': set(), 'tag': set()}
else:
to_store[ressource_hash]['target'] = set(to_store[ressource_hash]['target'])
to_store[ressource_hash]['tag'] = set(to_store[ressource_hash]['tag'])
if 'target' in details:
to_store[ressource_hash]['target'].add(details['target'])
if 'type' in details:
to_store[ressource_hash]['tag'].add(details['type'])
with open(known_malicious_ressource_file, 'w') as f:
json.dump(to_store, f, indent=2, default=dump_to_json)
def add_malicious(self, ressource_hash: str, details: Dict[str, str]):
self.store_known_malicious_ressource(ressource_hash, details)
p = self.redis.pipeline()
p.sadd('bh|malicious', ressource_hash)
if 'target' in details:
p.sadd(f'{ressource_hash}|target', details['target'])
if 'type' in details:
p.sadd(f'{ressource_hash}|tag', details['type'])
p.execute()
# Query DB
@ -291,6 +333,11 @@ class Context():
"""
status: List[Optional[bool]] = []
for urlnode, h in self._filter(urlnode, known_hashes):
# Note: we can have multiple hahes on the same urlnode (see embedded resources).
# They are expected to be on the same domain as urlnode. This code work as expected.
if self.redis.sismember('bh|malicious', h):
# Malicious, no need to go any further
return False
hostnames = self.redis.smembers(f'bh|{h}|legitimate')
if hostnames:
if urlnode.hostname in hostnames:
@ -298,8 +345,6 @@ class Context():
continue
else:
return False # Malicious
elif self.redis.sismember('bh|malicious', h):
return False # Malicious
else:
# NOTE: we do not return here, because we want to return False if *any* of the contents is malicious
status.append(None) # Unknown
@ -323,6 +368,8 @@ class Context():
def legitimacy_details(self, urlnode: URLNode, known_hashes: Iterable[str]) -> Dict[str, Tuple[bool, Optional[List[str]]]]:
to_return = {}
for urlnode, h in self._filter(urlnode, known_hashes):
# Note: we can have multiple hahes on the same urlnode (see embedded resources).
# They are expected to be on the same domain as urlnode. This code work as expected.
hostnames = self.redis.smembers(f'bh|{h}|legitimate')
if hostnames:
if urlnode.hostname in hostnames:
@ -451,6 +498,12 @@ class Lookyloo():
return ct
def add_context(self, capture_uuid: str, urlnode_uuid: str, ressource_hash: str, legitimate: bool, malicious: bool, details: Dict[str, Dict[str, str]]):
if malicious:
self.context.add_malicious(ressource_hash, details['malicious'])
if legitimate:
self.context.add_legitimate(ressource_hash, details['legitimate'])
def add_to_legitimate(self, capture_uuid: str, hostnode_uuid: Optional[str]=None, urlnode_uuid: Optional[str]=None):
ct = self.get_crawled_tree(capture_uuid)
self.context.mark_as_legitimate(ct, hostnode_uuid, urlnode_uuid)

View File

@ -196,7 +196,7 @@ def hostnode_popup(tree_uuid: str, node_uuid: str):
return render_template('hostname_popup.html',
tree_uuid=tree_uuid,
hostname_uuid=node_uuid,
hostnode_uuid=node_uuid,
hostname=hostnode.name,
urls=urls,
keys_response=keys_response,
@ -506,9 +506,27 @@ def mark_as_legitimate(tree_uuid: str):
@auth.login_required
def add_context(tree_uuid: str, urlnode_uuid: str):
context_data = request.form
legitimate: bool = context_data.get('legitimate') if context_data.get('legitimate') else False # type: ignore
malicious: bool = context_data.get('malicious') if context_data.get('malicious') else False # type: ignore
ressource_hash = context_data.get('hash_to_contextualize')
hostnode_uuid = context_data.get('hostnode_uuid')
legitimate: bool = True if context_data.get('legitimate') else False
malicious: bool = True if context_data.get('malicious') else False
details = {'malicious': {}, 'legitimate': {}}
if malicious:
malicious_details = {}
if context_data.get('malicious_type'):
malicious_details['type'] = context_data['malicious_type']
if context_data.get('malicious_target'):
malicious_details['target'] = context_data['malicious_target']
details['malicious'] = malicious_details
if legitimate:
legitimate_details = {}
if context_data.get('legitimate_domain'):
legitimate_details['domain'] = context_data['legitimate_domain']
if context_data.get('legitimate_description'):
legitimate_details['target'] = context_data['legitimate_description']
details['legitimate'] = legitimate_details
lookyloo.add_context(tree_uuid, urlnode_uuid, ressource_hash, legitimate, malicious, details)
return redirect(url_for('hostnode_popup', tree_uuid=tree_uuid, node_uuid=hostnode_uuid))
# Query API

View File

@ -62,8 +62,8 @@
{# Headers #}
<center>
<h3>{{ hostname }}</h3>
<button type="button" class="btn btn-info" onclick="whereAmI('{{ hostname_uuid }}')">Locate in tree</button>
<a href="{{ url_for('hostnode_details_text', tree_uuid=tree_uuid, node_uuid=hostname_uuid) }}" class="btn btn-info" role="button">Download URLs as text</a>
<button type="button" class="btn btn-info" onclick="whereAmI('{{ hostode_uuid }}')">Locate in tree</button>
<a href="{{ url_for('hostnode_details_text', tree_uuid=tree_uuid, node_uuid=hostnode_uuid) }}" class="btn btn-info" role="button">Download URLs as text</a>
</center>
{# Start list of URLs #}
<ul class="list-group-flush">
@ -173,11 +173,11 @@
</div>
<div class="form-group">
<label for="legitimate_domain">Domain serving the file when considered legitimate:</label>
<input type="text" class="form-control" name="legitimate_domain" id=legitimate_domain placeholder="Domain name">
<input type="text" class="form-control" name="legitimate_domain" id="legitimate_domain" placeholder="Domain name">
</div>
<div class="form-group">
<label for="extra_context">Other context for this content (library name, owner, ...):</label>
<input type="text" class="form-control" name="extra_context" id=extra_context placeholder="Context">
<label for="legitimate_description">Other context for this content (library name, owner, ...):</label>
<input type="text" class="form-control" name="legitimate_description" id="legitimate_description" placeholder="Description">
</div>
<div class="form-group">
<div class="form-check">
@ -187,12 +187,14 @@
</div>
<div class="form-group">
<label for="malicious_type">Type of malicious content (phishing, malware, ...):</label>
<input type="text" class="form-control" name="malicious_type" id=malicious_type placeholder="Type of malicious content">
<input type="text" class="form-control" name="malicious_type" id="malicious_type" placeholder="Type of malicious content">
</div>
<div class="form-group">
<label for="malicious_target">Legitimate target of the malicious content (expecially for phishing):</label>
<input type="text" class="form-control" name="legitimate_domain" id=legitimate_domain placeholder="Target">
<input type="text" class="form-control" name="malicious_target" id="malicious_target" placeholder="Target">
</div>
<input type="hidden" id="hash_to_contextualize" name="hash_to_contextualize" value="{{url['url_object'].body_hash}}">
<input type="hidden" id="hostnode_uuid" name="hostnode_uuid" value="{{hostnode_uuid}}">
<button type="submit" class="btn btn-primary" id="btn-looking">Submit context</button>
</form>
</div>