mirror of https://github.com/CIRCL/lookyloo
chg: Working prototype for user-defined context
parent
611b2a8ff5
commit
97b5b2d77f
|
@ -184,10 +184,12 @@ class Context():
|
|||
if filename == 'generic':
|
||||
for k, type_content in file_content.items():
|
||||
p.hmset('known_content', {h: type_content['description'] for h in type_content['entries']})
|
||||
elif filename == 'malicious':
|
||||
for h, details in file_content.items():
|
||||
p.sadd('bh|malicious', h)
|
||||
else:
|
||||
for mimetype, entry in file_content.items():
|
||||
for h, details in entry.items():
|
||||
p.sadd(f'bh|{h}|legitimate', *details['hostnames'])
|
||||
for h, details in file_content.items():
|
||||
p.sadd(f'bh|{h}|legitimate', *details['hostnames'])
|
||||
p.execute()
|
||||
|
||||
def find_known_content(self, har2tree_container: Union[CrawledTree, HostNode, URLNode]) -> Dict[str, Union[str, List[str]]]:
|
||||
|
@ -228,21 +230,31 @@ class Context():
|
|||
else:
|
||||
to_store = {}
|
||||
for urlnode, h in self._filter(urlnodes, known_content):
|
||||
if urlnode.mimetype:
|
||||
mimetype = urlnode.mimetype.split(';')[0]
|
||||
if mimetype not in to_store:
|
||||
to_store[mimetype] = {}
|
||||
if h not in to_store[mimetype]:
|
||||
to_store[mimetype][h] = {'filenames': set(), 'description': '', 'hostnames': set()}
|
||||
mimetype = ''
|
||||
if h != urlnode.body_hash:
|
||||
# this is the hash of an embeded content so it won't have a filename but has a different mimetype
|
||||
# FIXME: this is ugly.
|
||||
for ressource_mimetype, blobs in urlnode.embedded_ressources.items():
|
||||
for ressource_h, b in blobs:
|
||||
if ressource_h == h:
|
||||
mimetype = ressource_mimetype.split(';')[0]
|
||||
break
|
||||
if mimetype:
|
||||
break
|
||||
else:
|
||||
to_store[mimetype][h]['filenames'] = set(to_store[mimetype][h]['filenames'])
|
||||
to_store[mimetype][h]['hostnames'] = set(to_store[mimetype][h]['hostnames'])
|
||||
if urlnode.mimetype:
|
||||
mimetype = urlnode.mimetype.split(';')[0]
|
||||
if h not in to_store:
|
||||
to_store[h] = {'filenames': set(), 'description': '', 'hostnames': set(), 'mimetype': mimetype}
|
||||
else:
|
||||
to_store[h]['filenames'] = set(to_store[h]['filenames'])
|
||||
to_store[h]['hostnames'] = set(to_store[h]['hostnames'])
|
||||
|
||||
to_store[mimetype][h]['hostnames'].add(urlnode.hostname)
|
||||
to_store[h]['hostnames'].add(urlnode.hostname)
|
||||
if urlnode.url_split.path:
|
||||
filename = Path(urlnode.url_split.path).name
|
||||
if filename:
|
||||
to_store[mimetype][h]['filenames'].add(filename)
|
||||
to_store[h]['filenames'].add(filename)
|
||||
|
||||
with open(known_content_file, 'w') as f:
|
||||
json.dump(to_store, f, indent=2, default=dump_to_json)
|
||||
|
@ -258,6 +270,8 @@ class Context():
|
|||
known_content = self.find_known_content(tree)
|
||||
pipeline = self.redis.pipeline()
|
||||
for urlnode, h in self._filter(urlnodes, known_content):
|
||||
# Note: we can have multiple hahes on the same urlnode (see embedded resources).
|
||||
# They are expected to be on the same domain as urlnode. This code work as expected.
|
||||
pipeline.sadd(f'bh|{h}|legitimate', urlnode.hostname)
|
||||
pipeline.execute()
|
||||
|
||||
|
@ -277,9 +291,37 @@ class Context():
|
|||
def legitimate_body(self, body_hash: str, legitimate_hostname: str) -> None:
|
||||
self.redis.sadd(f'bh|{body_hash}|legitimate', legitimate_hostname)
|
||||
|
||||
def malicious_node(self, urlnode: URLNode, known_hashes: Iterable[str]) -> None:
|
||||
for _, h in self._filter(urlnode, known_hashes):
|
||||
self.redis.sadd('bh|malicious', h)
|
||||
def store_known_malicious_ressource(self, ressource_hash: str, details: Dict[str, str]):
|
||||
known_malicious_ressource_file = get_homedir() / 'known_content' / 'malicious.json'
|
||||
if known_malicious_ressource_file.exists():
|
||||
with open(known_malicious_ressource_file) as f:
|
||||
to_store = json.load(f)
|
||||
else:
|
||||
to_store = {}
|
||||
|
||||
if ressource_hash not in to_store:
|
||||
to_store[ressource_hash] = {'target': set(), 'tag': set()}
|
||||
else:
|
||||
to_store[ressource_hash]['target'] = set(to_store[ressource_hash]['target'])
|
||||
to_store[ressource_hash]['tag'] = set(to_store[ressource_hash]['tag'])
|
||||
|
||||
if 'target' in details:
|
||||
to_store[ressource_hash]['target'].add(details['target'])
|
||||
if 'type' in details:
|
||||
to_store[ressource_hash]['tag'].add(details['type'])
|
||||
|
||||
with open(known_malicious_ressource_file, 'w') as f:
|
||||
json.dump(to_store, f, indent=2, default=dump_to_json)
|
||||
|
||||
def add_malicious(self, ressource_hash: str, details: Dict[str, str]):
|
||||
self.store_known_malicious_ressource(ressource_hash, details)
|
||||
p = self.redis.pipeline()
|
||||
p.sadd('bh|malicious', ressource_hash)
|
||||
if 'target' in details:
|
||||
p.sadd(f'{ressource_hash}|target', details['target'])
|
||||
if 'type' in details:
|
||||
p.sadd(f'{ressource_hash}|tag', details['type'])
|
||||
p.execute()
|
||||
|
||||
# Query DB
|
||||
|
||||
|
@ -291,6 +333,11 @@ class Context():
|
|||
"""
|
||||
status: List[Optional[bool]] = []
|
||||
for urlnode, h in self._filter(urlnode, known_hashes):
|
||||
# Note: we can have multiple hahes on the same urlnode (see embedded resources).
|
||||
# They are expected to be on the same domain as urlnode. This code work as expected.
|
||||
if self.redis.sismember('bh|malicious', h):
|
||||
# Malicious, no need to go any further
|
||||
return False
|
||||
hostnames = self.redis.smembers(f'bh|{h}|legitimate')
|
||||
if hostnames:
|
||||
if urlnode.hostname in hostnames:
|
||||
|
@ -298,8 +345,6 @@ class Context():
|
|||
continue
|
||||
else:
|
||||
return False # Malicious
|
||||
elif self.redis.sismember('bh|malicious', h):
|
||||
return False # Malicious
|
||||
else:
|
||||
# NOTE: we do not return here, because we want to return False if *any* of the contents is malicious
|
||||
status.append(None) # Unknown
|
||||
|
@ -323,6 +368,8 @@ class Context():
|
|||
def legitimacy_details(self, urlnode: URLNode, known_hashes: Iterable[str]) -> Dict[str, Tuple[bool, Optional[List[str]]]]:
|
||||
to_return = {}
|
||||
for urlnode, h in self._filter(urlnode, known_hashes):
|
||||
# Note: we can have multiple hahes on the same urlnode (see embedded resources).
|
||||
# They are expected to be on the same domain as urlnode. This code work as expected.
|
||||
hostnames = self.redis.smembers(f'bh|{h}|legitimate')
|
||||
if hostnames:
|
||||
if urlnode.hostname in hostnames:
|
||||
|
@ -451,6 +498,12 @@ class Lookyloo():
|
|||
|
||||
return ct
|
||||
|
||||
def add_context(self, capture_uuid: str, urlnode_uuid: str, ressource_hash: str, legitimate: bool, malicious: bool, details: Dict[str, Dict[str, str]]):
|
||||
if malicious:
|
||||
self.context.add_malicious(ressource_hash, details['malicious'])
|
||||
if legitimate:
|
||||
self.context.add_legitimate(ressource_hash, details['legitimate'])
|
||||
|
||||
def add_to_legitimate(self, capture_uuid: str, hostnode_uuid: Optional[str]=None, urlnode_uuid: Optional[str]=None):
|
||||
ct = self.get_crawled_tree(capture_uuid)
|
||||
self.context.mark_as_legitimate(ct, hostnode_uuid, urlnode_uuid)
|
||||
|
|
|
@ -196,7 +196,7 @@ def hostnode_popup(tree_uuid: str, node_uuid: str):
|
|||
|
||||
return render_template('hostname_popup.html',
|
||||
tree_uuid=tree_uuid,
|
||||
hostname_uuid=node_uuid,
|
||||
hostnode_uuid=node_uuid,
|
||||
hostname=hostnode.name,
|
||||
urls=urls,
|
||||
keys_response=keys_response,
|
||||
|
@ -506,9 +506,27 @@ def mark_as_legitimate(tree_uuid: str):
|
|||
@auth.login_required
|
||||
def add_context(tree_uuid: str, urlnode_uuid: str):
|
||||
context_data = request.form
|
||||
legitimate: bool = context_data.get('legitimate') if context_data.get('legitimate') else False # type: ignore
|
||||
malicious: bool = context_data.get('malicious') if context_data.get('malicious') else False # type: ignore
|
||||
|
||||
ressource_hash = context_data.get('hash_to_contextualize')
|
||||
hostnode_uuid = context_data.get('hostnode_uuid')
|
||||
legitimate: bool = True if context_data.get('legitimate') else False
|
||||
malicious: bool = True if context_data.get('malicious') else False
|
||||
details = {'malicious': {}, 'legitimate': {}}
|
||||
if malicious:
|
||||
malicious_details = {}
|
||||
if context_data.get('malicious_type'):
|
||||
malicious_details['type'] = context_data['malicious_type']
|
||||
if context_data.get('malicious_target'):
|
||||
malicious_details['target'] = context_data['malicious_target']
|
||||
details['malicious'] = malicious_details
|
||||
if legitimate:
|
||||
legitimate_details = {}
|
||||
if context_data.get('legitimate_domain'):
|
||||
legitimate_details['domain'] = context_data['legitimate_domain']
|
||||
if context_data.get('legitimate_description'):
|
||||
legitimate_details['target'] = context_data['legitimate_description']
|
||||
details['legitimate'] = legitimate_details
|
||||
lookyloo.add_context(tree_uuid, urlnode_uuid, ressource_hash, legitimate, malicious, details)
|
||||
return redirect(url_for('hostnode_popup', tree_uuid=tree_uuid, node_uuid=hostnode_uuid))
|
||||
|
||||
|
||||
# Query API
|
||||
|
|
|
@ -62,8 +62,8 @@
|
|||
{# Headers #}
|
||||
<center>
|
||||
<h3>{{ hostname }}</h3>
|
||||
<button type="button" class="btn btn-info" onclick="whereAmI('{{ hostname_uuid }}')">Locate in tree</button>
|
||||
<a href="{{ url_for('hostnode_details_text', tree_uuid=tree_uuid, node_uuid=hostname_uuid) }}" class="btn btn-info" role="button">Download URLs as text</a>
|
||||
<button type="button" class="btn btn-info" onclick="whereAmI('{{ hostode_uuid }}')">Locate in tree</button>
|
||||
<a href="{{ url_for('hostnode_details_text', tree_uuid=tree_uuid, node_uuid=hostnode_uuid) }}" class="btn btn-info" role="button">Download URLs as text</a>
|
||||
</center>
|
||||
{# Start list of URLs #}
|
||||
<ul class="list-group-flush">
|
||||
|
@ -173,11 +173,11 @@
|
|||
</div>
|
||||
<div class="form-group">
|
||||
<label for="legitimate_domain">Domain serving the file when considered legitimate:</label>
|
||||
<input type="text" class="form-control" name="legitimate_domain" id=legitimate_domain placeholder="Domain name">
|
||||
<input type="text" class="form-control" name="legitimate_domain" id="legitimate_domain" placeholder="Domain name">
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="extra_context">Other context for this content (library name, owner, ...):</label>
|
||||
<input type="text" class="form-control" name="extra_context" id=extra_context placeholder="Context">
|
||||
<label for="legitimate_description">Other context for this content (library name, owner, ...):</label>
|
||||
<input type="text" class="form-control" name="legitimate_description" id="legitimate_description" placeholder="Description">
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<div class="form-check">
|
||||
|
@ -187,12 +187,14 @@
|
|||
</div>
|
||||
<div class="form-group">
|
||||
<label for="malicious_type">Type of malicious content (phishing, malware, ...):</label>
|
||||
<input type="text" class="form-control" name="malicious_type" id=malicious_type placeholder="Type of malicious content">
|
||||
<input type="text" class="form-control" name="malicious_type" id="malicious_type" placeholder="Type of malicious content">
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="malicious_target">Legitimate target of the malicious content (expecially for phishing):</label>
|
||||
<input type="text" class="form-control" name="legitimate_domain" id=legitimate_domain placeholder="Target">
|
||||
<input type="text" class="form-control" name="malicious_target" id="malicious_target" placeholder="Target">
|
||||
</div>
|
||||
<input type="hidden" id="hash_to_contextualize" name="hash_to_contextualize" value="{{url['url_object'].body_hash}}">
|
||||
<input type="hidden" id="hostnode_uuid" name="hostnode_uuid" value="{{hostnode_uuid}}">
|
||||
<button type="submit" class="btn btn-primary" id="btn-looking">Submit context</button>
|
||||
</form>
|
||||
</div>
|
||||
|
|
Loading…
Reference in New Issue