From b73a3b75330c54c936fd906babfab1db7bd46d2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Vinot?= Date: Tue, 8 Dec 2020 14:59:34 +0100 Subject: [PATCH] chg: Add attachements to MISP export --- lookyloo/lookyloo.py | 23 ++++++++++++++++++++++- poetry.lock | 18 +++++++++--------- pyproject.toml | 2 +- 3 files changed, 32 insertions(+), 11 deletions(-) diff --git a/lookyloo/lookyloo.py b/lookyloo/lookyloo.py index 59219ccc..f8c68cf4 100644 --- a/lookyloo/lookyloo.py +++ b/lookyloo/lookyloo.py @@ -25,7 +25,7 @@ import dns.resolver import dns.rdatatype from har2tree import CrawledTree, Har2TreeError, HarFile, HostNode, URLNode from pymisp import MISPEvent -from pymisp.tools import URLObject +from pymisp.tools import URLObject, FileObject from redis import Redis from scrapysplashwrapper import crawl from werkzeug.useragents import UserAgent @@ -888,19 +888,40 @@ class Lookyloo(): if cache['incomplete_redirects']: self.cache_tree(capture_uuid) cache = self.capture_cache(capture_uuid) + + capture_dir = self.lookup_capture_dir(capture_uuid) + if not capture_dir: + raise MissingUUID(f'Unable to find {capture_uuid}') + + ct = load_pickle_tree(capture_dir) + if not ct: + raise MissingUUID(f'Unable to find {capture_dir}') + event = MISPEvent() event.info = f'Lookyloo Capture ({cache["url"]})' event.add_attribute('link', f'https://{self.public_domain}/tree/{capture_uuid}') + initial_url = URLObject(cache["url"]) # type: ignore redirects = [URLObject(url) for url in cache['redirects']] # type: ignore + initial_url.add_reference(redirects[0], 'redirects-to') prec_object = redirects[0] for u_object in redirects[1:]: prec_object.add_reference(u_object, 'redirects-to') prec_object = u_object + event.add_object(initial_url) for u_object in redirects: event.add_object(u_object) + + event.add_attribute('attachment', 'screenshot_landing_page.png', data=self.get_screenshot(capture_uuid)) + try: + fo = FileObject(pseudofile=ct.root_hartree.rendered_node.body, filename='body_response.html') + fo.comment = 'Content received for the final redirect (before rendering)' + fo.add_reference(event.objects[-1], 'loaded-by', 'URL loading that content') + event.add_object(fo) + except Har2TreeError: + pass return event def get_hashes(self, tree_uuid: str, hostnode_uuid: Optional[str]=None, urlnode_uuid: Optional[str]=None) -> Set[str]: diff --git a/poetry.lock b/poetry.lock index 26383a65..3b8c4c31 100644 --- a/poetry.lock +++ b/poetry.lock @@ -306,20 +306,20 @@ tornado = ["tornado (>=0.2)"] [[package]] name = "har2tree" -version = "1.2.10" +version = "1.2.11" description = "HTTP Archive (HAR) to ETE Toolkit generator" category = "main" optional = false python-versions = ">=3.7,<4.0" [package.dependencies] -beautifulsoup4 = ">=4.8.2,<5.0.0" -ete3 = ">=3.1.1,<4.0.0" +beautifulsoup4 = ">=4.9.3,<5.0.0" +ete3 = ">=3.1.2,<4.0.0" filetype = ">=1.0.7,<2.0.0" -lxml = ">=4.4.2,<5.0.0" -numpy = ">=1.19.1,<2.0.0" +lxml = ">=4.6.2,<5.0.0" +numpy = ">=1.19.4,<2.0.0" publicsuffix2 = ">=2.20191221,<3.0" -six = ">=1.14.0,<2.0.0" +six = ">=1.15.0,<2.0.0" [[package]] name = "hyperlink" @@ -1108,7 +1108,7 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"] [metadata] lock-version = "1.1" python-versions = "^3.7" -content-hash = "2d239c1ccb0516874bfa976bbf238f8674167185c7a513df9aa1f35998eda50b" +content-hash = "fdf041a3e80d5489ff1d4a2c8e8ed8dca5ce986007eaa7494888d8ca934075ff" [metadata.files] aiohttp = [ @@ -1332,8 +1332,8 @@ gunicorn = [ {file = "gunicorn-20.0.4.tar.gz", hash = "sha256:1904bb2b8a43658807108d59c3f3d56c2b6121a701161de0ddf9ad140073c626"}, ] har2tree = [ - {file = "har2tree-1.2.10-py3-none-any.whl", hash = "sha256:6c879c2a351bb1f90d5e3577571f19779e0dc39eca0f29383647ae18d9d1157e"}, - {file = "har2tree-1.2.10.tar.gz", hash = "sha256:a0f8e6124d266ecde69c949c39afb83cb4f18b3accc3ebd045cae1832b56962c"}, + {file = "har2tree-1.2.11-py3-none-any.whl", hash = "sha256:49c3ac72e67208be4c8028a81ccfba34c0cfeeace85fdd35bd214fbffdb5f416"}, + {file = "har2tree-1.2.11.tar.gz", hash = "sha256:450e5ac6e720662fc2b925bdf5126ea285230bed1f231b1aabc272f66f4d1151"}, ] hyperlink = [ {file = "hyperlink-20.0.1-py2.py3-none-any.whl", hash = "sha256:c528d405766f15a2c536230de7e160b65a08e20264d8891b3eb03307b0df3c63"}, diff --git a/pyproject.toml b/pyproject.toml index cb602267..26da2f8c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,7 +46,7 @@ Flask-HTTPAuth = "^4.2.0" pyeupi = "^1.0" scrapysplashwrapper = "^1.2.8" pysanejs = "^1.3" -har2tree = "^1.2.10" +har2tree = "^1.2.11" pylookyloo = "^1.2" dnspython = "^2.0.0" pytaxonomies = "^1.3"