From 75cb4d32469fcf9f8e702f16a49f9e1321b3111e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Vinot?= Date: Mon, 16 Mar 2020 13:51:21 +0100 Subject: [PATCH] new: refang urls when needed --- lookyloo/lookyloo.py | 4 + poetry.lock | 254 ++++++++++++++++++++++++++++++++++++++++++- pyproject.toml | 2 + 3 files changed, 256 insertions(+), 4 deletions(-) diff --git a/lookyloo/lookyloo.py b/lookyloo/lookyloo.py index ba803e3..4bdc09a 100644 --- a/lookyloo/lookyloo.py +++ b/lookyloo/lookyloo.py @@ -32,6 +32,8 @@ from pysanejs import SaneJS from scrapysplashwrapper import crawl from har2tree import CrawledTree, Har2TreeError, HarFile +from defang import refang # type: ignore + class Lookyloo(): @@ -179,6 +181,8 @@ class Lookyloo(): os: str=None, browser: str=None) -> Union[bool, str]: if not url.startswith('http'): url = f'http://{url}' + else: + url = refang(url) if self.only_global_lookups: splitted_url = urlsplit(url) if splitted_url.netloc: diff --git a/poetry.lock b/poetry.lock index c859353..ac6c997 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,3 +1,12 @@ +[[package]] +category = "dev" +description = "Disable App Nap on OS X 10.9" +marker = "sys_platform == \"darwin\"" +name = "appnope" +optional = false +python-versions = "*" +version = "0.1.0" + [[package]] category = "main" description = "Classes Without Boilerplate" @@ -28,6 +37,14 @@ six = "*" [package.extras] visualize = ["graphviz (>0.5.1)", "Twisted (>=16.1.1)"] +[[package]] +category = "dev" +description = "Specifications for callback functions passed in to an API" +name = "backcall" +optional = false +python-versions = "*" +version = "0.1.0" + [[package]] category = "main" description = "Screen-scraping library" @@ -107,12 +124,21 @@ description = "A Python module to bypass Cloudflare's anti-bot page." name = "cloudscraper" optional = false python-versions = "*" -version = "1.2.26" +version = "1.2.28" [package.dependencies] requests = ">=2.9.2" requests-toolbelt = ">=0.9.1" +[[package]] +category = "dev" +description = "Cross-platform colored terminal text." +marker = "sys_platform == \"win32\"" +name = "colorama" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +version = "0.4.3" + [[package]] category = "main" description = "Symbolic constants in Python" @@ -149,6 +175,22 @@ optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" version = "1.1.0" +[[package]] +category = "dev" +description = "Decorators for Humans" +name = "decorator" +optional = false +python-versions = ">=2.6, !=3.0.*, !=3.1.*" +version = "4.4.2" + +[[package]] +category = "main" +description = "Defangs and refangs malicious URLs" +name = "defang" +optional = false +python-versions = "*" +version = "0.5.3" + [[package]] category = "main" description = "A Python Environment for (phylogenetic) Tree Exploration" @@ -212,6 +254,7 @@ six = "^1.14.0" reference = "58e054bc35f6489641f2eb1f56ebe9889334fef9" type = "git" url = "https://github.com/viper-framework/har2tree.git" + [[package]] category = "main" description = "A featureful, immutable, and correct URL for Python." @@ -244,6 +287,46 @@ version = "17.5.0" [package.extras] scripts = ["click (>=6.0)", "twisted (>=16.4.0)"] +[[package]] +category = "dev" +description = "IPython: Productive Interactive Computing" +name = "ipython" +optional = false +python-versions = ">=3.6" +version = "7.13.0" + +[package.dependencies] +appnope = "*" +backcall = "*" +colorama = "*" +decorator = "*" +jedi = ">=0.10" +pexpect = "*" +pickleshare = "*" +prompt-toolkit = ">=2.0.0,<3.0.0 || >3.0.0,<3.0.1 || >3.0.1,<3.1.0" +pygments = "*" +setuptools = ">=18.5" +traitlets = ">=4.2" + +[package.extras] +all = ["numpy (>=1.14)", "testpath", "notebook", "nose (>=0.10.1)", "nbconvert", "requests", "ipywidgets", "qtconsole", "ipyparallel", "Sphinx (>=1.3)", "pygments", "nbformat", "ipykernel"] +doc = ["Sphinx (>=1.3)"] +kernel = ["ipykernel"] +nbconvert = ["nbconvert"] +nbformat = ["nbformat"] +notebook = ["notebook", "ipywidgets"] +parallel = ["ipyparallel"] +qtconsole = ["qtconsole"] +test = ["nose (>=0.10.1)", "requests", "testpath", "pygments", "nbformat", "ipykernel", "numpy (>=1.14)"] + +[[package]] +category = "dev" +description = "Vestigial utilities from IPython" +name = "ipython-genutils" +optional = false +python-versions = "*" +version = "0.2.0" + [[package]] category = "main" description = "Various helpers to pass data to untrusted environments and back." @@ -252,6 +335,21 @@ optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" version = "1.1.0" +[[package]] +category = "dev" +description = "An autocompletion tool for Python that can be used for text editors." +name = "jedi" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +version = "0.16.0" + +[package.dependencies] +parso = ">=0.5.2" + +[package.extras] +qa = ["flake8 (3.7.9)"] +testing = ["colorama (0.4.1)", "docopt", "pytest (>=3.9.0,<5.0.0)"] + [[package]] category = "main" description = "A very fast and expressive template engine." @@ -329,6 +427,48 @@ w3lib = ">=1.19.0" python = "<3.4.0 || >=3.5.0" version = "*" +[[package]] +category = "dev" +description = "A Python Parser" +name = "parso" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +version = "0.6.2" + +[package.extras] +testing = ["docopt", "pytest (>=3.0.7)"] + +[[package]] +category = "dev" +description = "Pexpect allows easy control of interactive console applications." +marker = "sys_platform != \"win32\"" +name = "pexpect" +optional = false +python-versions = "*" +version = "4.8.0" + +[package.dependencies] +ptyprocess = ">=0.5" + +[[package]] +category = "dev" +description = "Tiny 'shelve'-like database with concurrency support" +name = "pickleshare" +optional = false +python-versions = "*" +version = "0.7.5" + +[[package]] +category = "dev" +description = "Library for building powerful interactive command lines in Python" +name = "prompt-toolkit" +optional = false +python-versions = ">=3.6" +version = "3.0.3" + +[package.dependencies] +wcwidth = "*" + [[package]] category = "main" description = "Pure-Python robots.txt parser with support for modern conventions" @@ -340,6 +480,15 @@ version = "0.1.16" [package.dependencies] six = "*" +[[package]] +category = "dev" +description = "Run a subprocess in a pseudo terminal" +marker = "sys_platform != \"win32\"" +name = "ptyprocess" +optional = false +python-versions = "*" +version = "0.6.0" + [[package]] category = "main" description = "Get a public suffix for a domain name using the Public Suffix List. Forked from and using the same API as the publicsuffix package." @@ -383,6 +532,14 @@ optional = false python-versions = "*" version = "2.0.5" +[[package]] +category = "dev" +description = "Pygments is a syntax highlighting package written in Python." +name = "pygments" +optional = false +python-versions = ">=3.5" +version = "2.6.1" + [[package]] category = "main" description = "Hamcrest framework for matcher objects" @@ -449,6 +606,7 @@ requests = "^2.22.0" reference = "3ea143f44d37ab701c70ffb38408528ddb4e2b6e" type = "git" url = "https://github.com/CIRCL/PySaneJS.git" + [[package]] category = "main" description = "Collection of persistent (disk-based) queues" @@ -548,6 +706,7 @@ scrapy-splash = "^0.7.2" reference = "0bf40df2bfd1c857169300bac21d47311fde1046" type = "git" url = "https://github.com/viper-framework/ScrapySplashWrapper.git" + [[package]] category = "main" description = "Service identity verification for pyOpenSSL & cryptography." @@ -584,6 +743,22 @@ optional = false python-versions = ">=3.5" version = "2.0" +[[package]] +category = "dev" +description = "Traitlets Python config system" +name = "traitlets" +optional = false +python-versions = "*" +version = "4.3.3" + +[package.dependencies] +decorator = "*" +ipython-genutils = "*" +six = "*" + +[package.extras] +test = ["pytest", "mock"] + [[package]] category = "main" description = "An asynchronous networking framework written in Python" @@ -654,6 +829,14 @@ version = "1.21.0" [package.dependencies] six = ">=1.4.1" +[[package]] +category = "dev" +description = "Measures number of Terminal column cells of wide-character codes" +name = "wcwidth" +optional = false +python-versions = "*" +version = "0.1.8" + [[package]] category = "main" description = "The comprehensive WSGI web application library." @@ -683,10 +866,14 @@ test = ["zope.event"] testing = ["zope.event", "nose", "coverage"] [metadata] -content-hash = "8f5e7e8be7e28726125f30d4448d1514570f13de44b2798f0b21dd133a7e2a3b" +content-hash = "2bbaf5f98d9409de1b18511b7cc72632107ed2f17bc6fbb02727e2e67388da7a" python-versions = "^3.6" [metadata.files] +appnope = [ + {file = "appnope-0.1.0-py2.py3-none-any.whl", hash = "sha256:5b26757dc6f79a3b7dc9fab95359328d5747fcb2409d331ea66d0272b90ab2a0"}, + {file = "appnope-0.1.0.tar.gz", hash = "sha256:8b995ffe925347a2138d7ac0fe77155e4311a0ea6d6da4f5128fe4b3cbe5ed71"}, +] attrs = [ {file = "attrs-19.3.0-py2.py3-none-any.whl", hash = "sha256:08a96c641c3a74e44eb59afb61a24f2cb9f4d7188748e76ba4bb5edfa3cb7d1c"}, {file = "attrs-19.3.0.tar.gz", hash = "sha256:f7b7ce16570fe9965acd6d30101a28f62fb4a7f9e926b3bbc9b61f8b04247e72"}, @@ -695,6 +882,10 @@ automat = [ {file = "Automat-20.2.0-py2.py3-none-any.whl", hash = "sha256:b6feb6455337df834f6c9962d6ccf771515b7d939bca142b29c20c2376bc6111"}, {file = "Automat-20.2.0.tar.gz", hash = "sha256:7979803c74610e11ef0c0d68a2942b152df52da55336e0c9d58daf1831cbdf33"}, ] +backcall = [ + {file = "backcall-0.1.0.tar.gz", hash = "sha256:38ecd85be2c1e78f77fd91700c76e14667dc21e2713b63876c0eb901196e01e4"}, + {file = "backcall-0.1.0.zip", hash = "sha256:bbbf4b1e5cd2bdb08f915895b51081c041bac22394fdfcfdfbe9f14b77c08bf2"}, +] beautifulsoup4 = [ {file = "beautifulsoup4-4.8.2-py2-none-any.whl", hash = "sha256:e1505eeed31b0f4ce2dbb3bc8eb256c04cc2b3b72af7d551a4ab6efd5cbe5dae"}, {file = "beautifulsoup4-4.8.2-py3-none-any.whl", hash = "sha256:9fbb4d6e48ecd30bcacc5b63b94088192dcda178513b2ae3c394229f8911b887"}, @@ -772,8 +963,12 @@ click = [ {file = "click-7.1.1.tar.gz", hash = "sha256:8a18b4ea89d8820c5d0c7da8a64b2c324b4dabb695804dbfea19b9be9d88c0cc"}, ] cloudscraper = [ - {file = "cloudscraper-1.2.26-py2.py3-none-any.whl", hash = "sha256:12eebdfab1e76ea82b0ac2bcbe9a53f2d5c7041bb7109d97465f720cecab8f33"}, - {file = "cloudscraper-1.2.26.tar.gz", hash = "sha256:309e237370e5e37d9354874311295bcaf640424771f9f21e21cba91d4276365a"}, + {file = "cloudscraper-1.2.28-py2.py3-none-any.whl", hash = "sha256:ebeb5f9adcb52ed6b763688b05c34264942116b01f8f0b7a5b7de7c1d505cd67"}, + {file = "cloudscraper-1.2.28.tar.gz", hash = "sha256:7a843769aaed50905afeff82c3ebdd1a3a5980bfa5436719f05f0db4af127790"}, +] +colorama = [ + {file = "colorama-0.4.3-py2.py3-none-any.whl", hash = "sha256:7d73d2a99753107a36ac6b455ee49046802e59d9d076ef8e47b61499fa29afff"}, + {file = "colorama-0.4.3.tar.gz", hash = "sha256:e96da0d330793e2cb9485e9ddfd918d456036c7149416295932478192f4436a1"}, ] constantly = [ {file = "constantly-15.1.0-py2.py3-none-any.whl", hash = "sha256:dd2fa9d6b1a51a83f0d7dd76293d734046aa176e384bf6e33b7e44880eb37c5d"}, @@ -806,6 +1001,13 @@ cssselect = [ {file = "cssselect-1.1.0-py2.py3-none-any.whl", hash = "sha256:f612ee47b749c877ebae5bb77035d8f4202c6ad0f0fc1271b3c18ad6c4468ecf"}, {file = "cssselect-1.1.0.tar.gz", hash = "sha256:f95f8dedd925fd8f54edb3d2dfb44c190d9d18512377d3c1e2388d16126879bc"}, ] +decorator = [ + {file = "decorator-4.4.2-py2.py3-none-any.whl", hash = "sha256:41fa54c2a0cc4ba648be4fd43cff00aedf5b9465c9bf18d64325bc225f08f760"}, + {file = "decorator-4.4.2.tar.gz", hash = "sha256:e3a62f0520172440ca0dcc823749319382e377f37f140a0b99ef45fecb84bfe7"}, +] +defang = [ + {file = "defang-0.5.3.tar.gz", hash = "sha256:86aeff658d7cd4c3b61d16089872e1c1f0a1b7b3c64d4ca9525c017caeb284d7"}, +] ete3 = [ {file = "ete3-3.1.1.tar.gz", hash = "sha256:870a3d4b496a36fbda4b13c7c6b9dfa7638384539ae93551ec7acb377fb9c385"}, ] @@ -830,10 +1032,22 @@ incremental = [ {file = "incremental-17.5.0-py2.py3-none-any.whl", hash = "sha256:717e12246dddf231a349175f48d74d93e2897244939173b01974ab6661406b9f"}, {file = "incremental-17.5.0.tar.gz", hash = "sha256:7b751696aaf36eebfab537e458929e194460051ccad279c72b755a167eebd4b3"}, ] +ipython = [ + {file = "ipython-7.13.0-py3-none-any.whl", hash = "sha256:eb8d075de37f678424527b5ef6ea23f7b80240ca031c2dd6de5879d687a65333"}, + {file = "ipython-7.13.0.tar.gz", hash = "sha256:ca478e52ae1f88da0102360e57e528b92f3ae4316aabac80a2cd7f7ab2efb48a"}, +] +ipython-genutils = [ + {file = "ipython_genutils-0.2.0-py2.py3-none-any.whl", hash = "sha256:72dd37233799e619666c9f639a9da83c34013a73e8bbc79a7a6348d93c61fab8"}, + {file = "ipython_genutils-0.2.0.tar.gz", hash = "sha256:eb2e116e75ecef9d4d228fdc66af54269afa26ab4463042e33785b887c628ba8"}, +] itsdangerous = [ {file = "itsdangerous-1.1.0-py2.py3-none-any.whl", hash = "sha256:b12271b2047cb23eeb98c8b5622e2e5c5e9abd9784a153e9d8ef9cb4dd09d749"}, {file = "itsdangerous-1.1.0.tar.gz", hash = "sha256:321b033d07f2a4136d3ec762eac9f16a10ccd60f53c0c91af90217ace7ba1f19"}, ] +jedi = [ + {file = "jedi-0.16.0-py2.py3-none-any.whl", hash = "sha256:b4f4052551025c6b0b0b193b29a6ff7bdb74c52450631206c262aef9f7159ad2"}, + {file = "jedi-0.16.0.tar.gz", hash = "sha256:d5c871cb9360b414f981e7072c52c33258d598305280fef91c6cae34739d65d5"}, +] jinja2 = [ {file = "Jinja2-2.11.1-py2.py3-none-any.whl", hash = "sha256:b0eaf100007721b5c16c1fc1eecb87409464edc10469ddc9a22a27a99123be49"}, {file = "Jinja2-2.11.1.tar.gz", hash = "sha256:93187ffbc7808079673ef52771baa950426fd664d3aad1d0fa3e95644360e250"}, @@ -921,9 +1135,29 @@ parsel = [ {file = "parsel-1.5.2-py2.py3-none-any.whl", hash = "sha256:74f8e9d3b345b14cb1416bd777a03982cde33a74d8b32e0c71e651d07d41d40a"}, {file = "parsel-1.5.2.tar.gz", hash = "sha256:4da4262ba4605573b6b72a5f557616a2fc9dee7a47a1efad562752a28d366723"}, ] +parso = [ + {file = "parso-0.6.2-py2.py3-none-any.whl", hash = "sha256:8515fc12cfca6ee3aa59138741fc5624d62340c97e401c74875769948d4f2995"}, + {file = "parso-0.6.2.tar.gz", hash = "sha256:0c5659e0c6eba20636f99a04f469798dca8da279645ce5c387315b2c23912157"}, +] +pexpect = [ + {file = "pexpect-4.8.0-py2.py3-none-any.whl", hash = "sha256:0b48a55dcb3c05f3329815901ea4fc1537514d6ba867a152b581d69ae3710937"}, + {file = "pexpect-4.8.0.tar.gz", hash = "sha256:fc65a43959d153d0114afe13997d439c22823a27cefceb5ff35c2178c6784c0c"}, +] +pickleshare = [ + {file = "pickleshare-0.7.5-py2.py3-none-any.whl", hash = "sha256:9649af414d74d4df115d5d718f82acb59c9d418196b7b4290ed47a12ce62df56"}, + {file = "pickleshare-0.7.5.tar.gz", hash = "sha256:87683d47965c1da65cdacaf31c8441d12b8044cdec9aca500cd78fc2c683afca"}, +] +prompt-toolkit = [ + {file = "prompt_toolkit-3.0.3-py3-none-any.whl", hash = "sha256:c93e53af97f630f12f5f62a3274e79527936ed466f038953dfa379d4941f651a"}, + {file = "prompt_toolkit-3.0.3.tar.gz", hash = "sha256:a402e9bf468b63314e37460b68ba68243d55b2f8c4d0192f85a019af3945050e"}, +] protego = [ {file = "Protego-0.1.16.tar.gz", hash = "sha256:a682771bc7b51b2ff41466460896c1a5a653f9a1e71639ef365a72e66d8734b4"}, ] +ptyprocess = [ + {file = "ptyprocess-0.6.0-py2.py3-none-any.whl", hash = "sha256:d7cc528d76e76342423ca640335bd3633420dc1366f258cb31d05e865ef5ca1f"}, + {file = "ptyprocess-0.6.0.tar.gz", hash = "sha256:923f299cc5ad920c68f2bc0bc98b75b9f838b93b599941a6b63ddbc2476394c0"}, +] publicsuffix2 = [ {file = "publicsuffix2-2.20191221-py2.py3-none-any.whl", hash = "sha256:786b5e36205b88758bd3518725ec8cfe7a8173f5269354641f581c6b80a99893"}, {file = "publicsuffix2-2.20191221.tar.gz", hash = "sha256:00f8cc31aa8d0d5592a5ced19cccba7de428ebca985db26ac852d920ddd6fe7b"}, @@ -966,6 +1200,10 @@ pydispatcher = [ {file = "PyDispatcher-2.0.5.tar.gz", hash = "sha256:5570069e1b1769af1fe481de6dd1d3a388492acddd2cdad7a3bde145615d5caf"}, {file = "PyDispatcher-2.0.5.zip", hash = "sha256:5be4a8be12805ef7d712dd9a93284fb8bc53f309867e573f653a72e5fd10e433"}, ] +pygments = [ + {file = "Pygments-2.6.1-py3-none-any.whl", hash = "sha256:ff7a40b4860b727ab48fad6360eb351cc1b33cbf9b15a0f689ca5353e9463324"}, + {file = "Pygments-2.6.1.tar.gz", hash = "sha256:647344a061c249a3b74e230c739f434d7ea4d8b1d5f3721bc0f3558049b38f44"}, +] pyhamcrest = [ {file = "PyHamcrest-2.0.2-py3-none-any.whl", hash = "sha256:7ead136e03655af85069b6f47b23eb7c3e5c221aa9f022a4fbb499f5b7308f29"}, {file = "PyHamcrest-2.0.2.tar.gz", hash = "sha256:412e00137858f04bde0729913874a48485665f2d36fe9ee449f26be864af9316"}, @@ -1016,6 +1254,10 @@ soupsieve = [ {file = "soupsieve-2.0-py2.py3-none-any.whl", hash = "sha256:fcd71e08c0aee99aca1b73f45478549ee7e7fc006d51b37bec9e9def7dc22b69"}, {file = "soupsieve-2.0.tar.gz", hash = "sha256:e914534802d7ffd233242b785229d5ba0766a7f487385e3f714446a07bf540ae"}, ] +traitlets = [ + {file = "traitlets-4.3.3-py2.py3-none-any.whl", hash = "sha256:70b4c6a1d9019d7b4f6846832288f86998aa3b9207c6821f3578a6a6a467fe44"}, + {file = "traitlets-4.3.3.tar.gz", hash = "sha256:d023ee369ddd2763310e4c3eae1ff649689440d4ae59d7485eb4cfbbe3e359f7"}, +] twisted = [ {file = "Twisted-19.10.0-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:257dbc78e72bc69c2970035fc74df54b04573d5ddd380251a8a23f74d619db03"}, {file = "Twisted-19.10.0-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:a1de7598ce977943b3edbcc0a7d2112f134cc1b98b2fd7e348ee9e0bef092e50"}, @@ -1073,6 +1315,10 @@ w3lib = [ {file = "w3lib-1.21.0-py2.py3-none-any.whl", hash = "sha256:847704b837b2b973cddef6938325d466628e6078266bc2e1f7ac49ba85c34823"}, {file = "w3lib-1.21.0.tar.gz", hash = "sha256:8b1854fef570b5a5fc84d960e025debd110485d73fd283580376104762774315"}, ] +wcwidth = [ + {file = "wcwidth-0.1.8-py2.py3-none-any.whl", hash = "sha256:8fd29383f539be45b20bd4df0dc29c20ba48654a41e661925e612311e9f3c603"}, + {file = "wcwidth-0.1.8.tar.gz", hash = "sha256:f28b3e8a6483e5d49e7f8949ac1a78314e740333ae305b4ba5defd3e74fb37a8"}, +] werkzeug = [ {file = "Werkzeug-1.0.0-py2.py3-none-any.whl", hash = "sha256:6dc65cf9091cf750012f56f2cad759fa9e879f511b5ff8685e456b4e3bf90d16"}, {file = "Werkzeug-1.0.0.tar.gz", hash = "sha256:169ba8a33788476292d04186ab33b01d6add475033dfc07215e6d219cc077096"}, diff --git a/pyproject.toml b/pyproject.toml index 633785b..0f5110f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,9 +42,11 @@ pylookyloo = {path = "client"} beautifulsoup4 = "^4.8.2" bootstrap-flask = "^1.2.0" cloudscraper = "^1.2.20" +defang = "^0.5.3" [tool.poetry.dev-dependencies] mypy = "^0.761" +ipython = "^7.13.0" [build-system] requires = ["poetry>=0.12"]