new: refang urls when needed

pull/67/head
Raphaël Vinot 2020-03-16 13:51:21 +01:00
parent 6beb1c39eb
commit 75cb4d3246
3 changed files with 256 additions and 4 deletions

View File

@ -32,6 +32,8 @@ from pysanejs import SaneJS
from scrapysplashwrapper import crawl
from har2tree import CrawledTree, Har2TreeError, HarFile
from defang import refang # type: ignore
class Lookyloo():
@ -179,6 +181,8 @@ class Lookyloo():
os: str=None, browser: str=None) -> Union[bool, str]:
if not url.startswith('http'):
url = f'http://{url}'
else:
url = refang(url)
if self.only_global_lookups:
splitted_url = urlsplit(url)
if splitted_url.netloc:

254
poetry.lock generated
View File

@ -1,3 +1,12 @@
[[package]]
category = "dev"
description = "Disable App Nap on OS X 10.9"
marker = "sys_platform == \"darwin\""
name = "appnope"
optional = false
python-versions = "*"
version = "0.1.0"
[[package]]
category = "main"
description = "Classes Without Boilerplate"
@ -28,6 +37,14 @@ six = "*"
[package.extras]
visualize = ["graphviz (>0.5.1)", "Twisted (>=16.1.1)"]
[[package]]
category = "dev"
description = "Specifications for callback functions passed in to an API"
name = "backcall"
optional = false
python-versions = "*"
version = "0.1.0"
[[package]]
category = "main"
description = "Screen-scraping library"
@ -107,12 +124,21 @@ description = "A Python module to bypass Cloudflare's anti-bot page."
name = "cloudscraper"
optional = false
python-versions = "*"
version = "1.2.26"
version = "1.2.28"
[package.dependencies]
requests = ">=2.9.2"
requests-toolbelt = ">=0.9.1"
[[package]]
category = "dev"
description = "Cross-platform colored terminal text."
marker = "sys_platform == \"win32\""
name = "colorama"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
version = "0.4.3"
[[package]]
category = "main"
description = "Symbolic constants in Python"
@ -149,6 +175,22 @@ optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
version = "1.1.0"
[[package]]
category = "dev"
description = "Decorators for Humans"
name = "decorator"
optional = false
python-versions = ">=2.6, !=3.0.*, !=3.1.*"
version = "4.4.2"
[[package]]
category = "main"
description = "Defangs and refangs malicious URLs"
name = "defang"
optional = false
python-versions = "*"
version = "0.5.3"
[[package]]
category = "main"
description = "A Python Environment for (phylogenetic) Tree Exploration"
@ -212,6 +254,7 @@ six = "^1.14.0"
reference = "58e054bc35f6489641f2eb1f56ebe9889334fef9"
type = "git"
url = "https://github.com/viper-framework/har2tree.git"
[[package]]
category = "main"
description = "A featureful, immutable, and correct URL for Python."
@ -244,6 +287,46 @@ version = "17.5.0"
[package.extras]
scripts = ["click (>=6.0)", "twisted (>=16.4.0)"]
[[package]]
category = "dev"
description = "IPython: Productive Interactive Computing"
name = "ipython"
optional = false
python-versions = ">=3.6"
version = "7.13.0"
[package.dependencies]
appnope = "*"
backcall = "*"
colorama = "*"
decorator = "*"
jedi = ">=0.10"
pexpect = "*"
pickleshare = "*"
prompt-toolkit = ">=2.0.0,<3.0.0 || >3.0.0,<3.0.1 || >3.0.1,<3.1.0"
pygments = "*"
setuptools = ">=18.5"
traitlets = ">=4.2"
[package.extras]
all = ["numpy (>=1.14)", "testpath", "notebook", "nose (>=0.10.1)", "nbconvert", "requests", "ipywidgets", "qtconsole", "ipyparallel", "Sphinx (>=1.3)", "pygments", "nbformat", "ipykernel"]
doc = ["Sphinx (>=1.3)"]
kernel = ["ipykernel"]
nbconvert = ["nbconvert"]
nbformat = ["nbformat"]
notebook = ["notebook", "ipywidgets"]
parallel = ["ipyparallel"]
qtconsole = ["qtconsole"]
test = ["nose (>=0.10.1)", "requests", "testpath", "pygments", "nbformat", "ipykernel", "numpy (>=1.14)"]
[[package]]
category = "dev"
description = "Vestigial utilities from IPython"
name = "ipython-genutils"
optional = false
python-versions = "*"
version = "0.2.0"
[[package]]
category = "main"
description = "Various helpers to pass data to untrusted environments and back."
@ -252,6 +335,21 @@ optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
version = "1.1.0"
[[package]]
category = "dev"
description = "An autocompletion tool for Python that can be used for text editors."
name = "jedi"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
version = "0.16.0"
[package.dependencies]
parso = ">=0.5.2"
[package.extras]
qa = ["flake8 (3.7.9)"]
testing = ["colorama (0.4.1)", "docopt", "pytest (>=3.9.0,<5.0.0)"]
[[package]]
category = "main"
description = "A very fast and expressive template engine."
@ -329,6 +427,48 @@ w3lib = ">=1.19.0"
python = "<3.4.0 || >=3.5.0"
version = "*"
[[package]]
category = "dev"
description = "A Python Parser"
name = "parso"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
version = "0.6.2"
[package.extras]
testing = ["docopt", "pytest (>=3.0.7)"]
[[package]]
category = "dev"
description = "Pexpect allows easy control of interactive console applications."
marker = "sys_platform != \"win32\""
name = "pexpect"
optional = false
python-versions = "*"
version = "4.8.0"
[package.dependencies]
ptyprocess = ">=0.5"
[[package]]
category = "dev"
description = "Tiny 'shelve'-like database with concurrency support"
name = "pickleshare"
optional = false
python-versions = "*"
version = "0.7.5"
[[package]]
category = "dev"
description = "Library for building powerful interactive command lines in Python"
name = "prompt-toolkit"
optional = false
python-versions = ">=3.6"
version = "3.0.3"
[package.dependencies]
wcwidth = "*"
[[package]]
category = "main"
description = "Pure-Python robots.txt parser with support for modern conventions"
@ -340,6 +480,15 @@ version = "0.1.16"
[package.dependencies]
six = "*"
[[package]]
category = "dev"
description = "Run a subprocess in a pseudo terminal"
marker = "sys_platform != \"win32\""
name = "ptyprocess"
optional = false
python-versions = "*"
version = "0.6.0"
[[package]]
category = "main"
description = "Get a public suffix for a domain name using the Public Suffix List. Forked from and using the same API as the publicsuffix package."
@ -383,6 +532,14 @@ optional = false
python-versions = "*"
version = "2.0.5"
[[package]]
category = "dev"
description = "Pygments is a syntax highlighting package written in Python."
name = "pygments"
optional = false
python-versions = ">=3.5"
version = "2.6.1"
[[package]]
category = "main"
description = "Hamcrest framework for matcher objects"
@ -449,6 +606,7 @@ requests = "^2.22.0"
reference = "3ea143f44d37ab701c70ffb38408528ddb4e2b6e"
type = "git"
url = "https://github.com/CIRCL/PySaneJS.git"
[[package]]
category = "main"
description = "Collection of persistent (disk-based) queues"
@ -548,6 +706,7 @@ scrapy-splash = "^0.7.2"
reference = "0bf40df2bfd1c857169300bac21d47311fde1046"
type = "git"
url = "https://github.com/viper-framework/ScrapySplashWrapper.git"
[[package]]
category = "main"
description = "Service identity verification for pyOpenSSL & cryptography."
@ -584,6 +743,22 @@ optional = false
python-versions = ">=3.5"
version = "2.0"
[[package]]
category = "dev"
description = "Traitlets Python config system"
name = "traitlets"
optional = false
python-versions = "*"
version = "4.3.3"
[package.dependencies]
decorator = "*"
ipython-genutils = "*"
six = "*"
[package.extras]
test = ["pytest", "mock"]
[[package]]
category = "main"
description = "An asynchronous networking framework written in Python"
@ -654,6 +829,14 @@ version = "1.21.0"
[package.dependencies]
six = ">=1.4.1"
[[package]]
category = "dev"
description = "Measures number of Terminal column cells of wide-character codes"
name = "wcwidth"
optional = false
python-versions = "*"
version = "0.1.8"
[[package]]
category = "main"
description = "The comprehensive WSGI web application library."
@ -683,10 +866,14 @@ test = ["zope.event"]
testing = ["zope.event", "nose", "coverage"]
[metadata]
content-hash = "8f5e7e8be7e28726125f30d4448d1514570f13de44b2798f0b21dd133a7e2a3b"
content-hash = "2bbaf5f98d9409de1b18511b7cc72632107ed2f17bc6fbb02727e2e67388da7a"
python-versions = "^3.6"
[metadata.files]
appnope = [
{file = "appnope-0.1.0-py2.py3-none-any.whl", hash = "sha256:5b26757dc6f79a3b7dc9fab95359328d5747fcb2409d331ea66d0272b90ab2a0"},
{file = "appnope-0.1.0.tar.gz", hash = "sha256:8b995ffe925347a2138d7ac0fe77155e4311a0ea6d6da4f5128fe4b3cbe5ed71"},
]
attrs = [
{file = "attrs-19.3.0-py2.py3-none-any.whl", hash = "sha256:08a96c641c3a74e44eb59afb61a24f2cb9f4d7188748e76ba4bb5edfa3cb7d1c"},
{file = "attrs-19.3.0.tar.gz", hash = "sha256:f7b7ce16570fe9965acd6d30101a28f62fb4a7f9e926b3bbc9b61f8b04247e72"},
@ -695,6 +882,10 @@ automat = [
{file = "Automat-20.2.0-py2.py3-none-any.whl", hash = "sha256:b6feb6455337df834f6c9962d6ccf771515b7d939bca142b29c20c2376bc6111"},
{file = "Automat-20.2.0.tar.gz", hash = "sha256:7979803c74610e11ef0c0d68a2942b152df52da55336e0c9d58daf1831cbdf33"},
]
backcall = [
{file = "backcall-0.1.0.tar.gz", hash = "sha256:38ecd85be2c1e78f77fd91700c76e14667dc21e2713b63876c0eb901196e01e4"},
{file = "backcall-0.1.0.zip", hash = "sha256:bbbf4b1e5cd2bdb08f915895b51081c041bac22394fdfcfdfbe9f14b77c08bf2"},
]
beautifulsoup4 = [
{file = "beautifulsoup4-4.8.2-py2-none-any.whl", hash = "sha256:e1505eeed31b0f4ce2dbb3bc8eb256c04cc2b3b72af7d551a4ab6efd5cbe5dae"},
{file = "beautifulsoup4-4.8.2-py3-none-any.whl", hash = "sha256:9fbb4d6e48ecd30bcacc5b63b94088192dcda178513b2ae3c394229f8911b887"},
@ -772,8 +963,12 @@ click = [
{file = "click-7.1.1.tar.gz", hash = "sha256:8a18b4ea89d8820c5d0c7da8a64b2c324b4dabb695804dbfea19b9be9d88c0cc"},
]
cloudscraper = [
{file = "cloudscraper-1.2.26-py2.py3-none-any.whl", hash = "sha256:12eebdfab1e76ea82b0ac2bcbe9a53f2d5c7041bb7109d97465f720cecab8f33"},
{file = "cloudscraper-1.2.26.tar.gz", hash = "sha256:309e237370e5e37d9354874311295bcaf640424771f9f21e21cba91d4276365a"},
{file = "cloudscraper-1.2.28-py2.py3-none-any.whl", hash = "sha256:ebeb5f9adcb52ed6b763688b05c34264942116b01f8f0b7a5b7de7c1d505cd67"},
{file = "cloudscraper-1.2.28.tar.gz", hash = "sha256:7a843769aaed50905afeff82c3ebdd1a3a5980bfa5436719f05f0db4af127790"},
]
colorama = [
{file = "colorama-0.4.3-py2.py3-none-any.whl", hash = "sha256:7d73d2a99753107a36ac6b455ee49046802e59d9d076ef8e47b61499fa29afff"},
{file = "colorama-0.4.3.tar.gz", hash = "sha256:e96da0d330793e2cb9485e9ddfd918d456036c7149416295932478192f4436a1"},
]
constantly = [
{file = "constantly-15.1.0-py2.py3-none-any.whl", hash = "sha256:dd2fa9d6b1a51a83f0d7dd76293d734046aa176e384bf6e33b7e44880eb37c5d"},
@ -806,6 +1001,13 @@ cssselect = [
{file = "cssselect-1.1.0-py2.py3-none-any.whl", hash = "sha256:f612ee47b749c877ebae5bb77035d8f4202c6ad0f0fc1271b3c18ad6c4468ecf"},
{file = "cssselect-1.1.0.tar.gz", hash = "sha256:f95f8dedd925fd8f54edb3d2dfb44c190d9d18512377d3c1e2388d16126879bc"},
]
decorator = [
{file = "decorator-4.4.2-py2.py3-none-any.whl", hash = "sha256:41fa54c2a0cc4ba648be4fd43cff00aedf5b9465c9bf18d64325bc225f08f760"},
{file = "decorator-4.4.2.tar.gz", hash = "sha256:e3a62f0520172440ca0dcc823749319382e377f37f140a0b99ef45fecb84bfe7"},
]
defang = [
{file = "defang-0.5.3.tar.gz", hash = "sha256:86aeff658d7cd4c3b61d16089872e1c1f0a1b7b3c64d4ca9525c017caeb284d7"},
]
ete3 = [
{file = "ete3-3.1.1.tar.gz", hash = "sha256:870a3d4b496a36fbda4b13c7c6b9dfa7638384539ae93551ec7acb377fb9c385"},
]
@ -830,10 +1032,22 @@ incremental = [
{file = "incremental-17.5.0-py2.py3-none-any.whl", hash = "sha256:717e12246dddf231a349175f48d74d93e2897244939173b01974ab6661406b9f"},
{file = "incremental-17.5.0.tar.gz", hash = "sha256:7b751696aaf36eebfab537e458929e194460051ccad279c72b755a167eebd4b3"},
]
ipython = [
{file = "ipython-7.13.0-py3-none-any.whl", hash = "sha256:eb8d075de37f678424527b5ef6ea23f7b80240ca031c2dd6de5879d687a65333"},
{file = "ipython-7.13.0.tar.gz", hash = "sha256:ca478e52ae1f88da0102360e57e528b92f3ae4316aabac80a2cd7f7ab2efb48a"},
]
ipython-genutils = [
{file = "ipython_genutils-0.2.0-py2.py3-none-any.whl", hash = "sha256:72dd37233799e619666c9f639a9da83c34013a73e8bbc79a7a6348d93c61fab8"},
{file = "ipython_genutils-0.2.0.tar.gz", hash = "sha256:eb2e116e75ecef9d4d228fdc66af54269afa26ab4463042e33785b887c628ba8"},
]
itsdangerous = [
{file = "itsdangerous-1.1.0-py2.py3-none-any.whl", hash = "sha256:b12271b2047cb23eeb98c8b5622e2e5c5e9abd9784a153e9d8ef9cb4dd09d749"},
{file = "itsdangerous-1.1.0.tar.gz", hash = "sha256:321b033d07f2a4136d3ec762eac9f16a10ccd60f53c0c91af90217ace7ba1f19"},
]
jedi = [
{file = "jedi-0.16.0-py2.py3-none-any.whl", hash = "sha256:b4f4052551025c6b0b0b193b29a6ff7bdb74c52450631206c262aef9f7159ad2"},
{file = "jedi-0.16.0.tar.gz", hash = "sha256:d5c871cb9360b414f981e7072c52c33258d598305280fef91c6cae34739d65d5"},
]
jinja2 = [
{file = "Jinja2-2.11.1-py2.py3-none-any.whl", hash = "sha256:b0eaf100007721b5c16c1fc1eecb87409464edc10469ddc9a22a27a99123be49"},
{file = "Jinja2-2.11.1.tar.gz", hash = "sha256:93187ffbc7808079673ef52771baa950426fd664d3aad1d0fa3e95644360e250"},
@ -921,9 +1135,29 @@ parsel = [
{file = "parsel-1.5.2-py2.py3-none-any.whl", hash = "sha256:74f8e9d3b345b14cb1416bd777a03982cde33a74d8b32e0c71e651d07d41d40a"},
{file = "parsel-1.5.2.tar.gz", hash = "sha256:4da4262ba4605573b6b72a5f557616a2fc9dee7a47a1efad562752a28d366723"},
]
parso = [
{file = "parso-0.6.2-py2.py3-none-any.whl", hash = "sha256:8515fc12cfca6ee3aa59138741fc5624d62340c97e401c74875769948d4f2995"},
{file = "parso-0.6.2.tar.gz", hash = "sha256:0c5659e0c6eba20636f99a04f469798dca8da279645ce5c387315b2c23912157"},
]
pexpect = [
{file = "pexpect-4.8.0-py2.py3-none-any.whl", hash = "sha256:0b48a55dcb3c05f3329815901ea4fc1537514d6ba867a152b581d69ae3710937"},
{file = "pexpect-4.8.0.tar.gz", hash = "sha256:fc65a43959d153d0114afe13997d439c22823a27cefceb5ff35c2178c6784c0c"},
]
pickleshare = [
{file = "pickleshare-0.7.5-py2.py3-none-any.whl", hash = "sha256:9649af414d74d4df115d5d718f82acb59c9d418196b7b4290ed47a12ce62df56"},
{file = "pickleshare-0.7.5.tar.gz", hash = "sha256:87683d47965c1da65cdacaf31c8441d12b8044cdec9aca500cd78fc2c683afca"},
]
prompt-toolkit = [
{file = "prompt_toolkit-3.0.3-py3-none-any.whl", hash = "sha256:c93e53af97f630f12f5f62a3274e79527936ed466f038953dfa379d4941f651a"},
{file = "prompt_toolkit-3.0.3.tar.gz", hash = "sha256:a402e9bf468b63314e37460b68ba68243d55b2f8c4d0192f85a019af3945050e"},
]
protego = [
{file = "Protego-0.1.16.tar.gz", hash = "sha256:a682771bc7b51b2ff41466460896c1a5a653f9a1e71639ef365a72e66d8734b4"},
]
ptyprocess = [
{file = "ptyprocess-0.6.0-py2.py3-none-any.whl", hash = "sha256:d7cc528d76e76342423ca640335bd3633420dc1366f258cb31d05e865ef5ca1f"},
{file = "ptyprocess-0.6.0.tar.gz", hash = "sha256:923f299cc5ad920c68f2bc0bc98b75b9f838b93b599941a6b63ddbc2476394c0"},
]
publicsuffix2 = [
{file = "publicsuffix2-2.20191221-py2.py3-none-any.whl", hash = "sha256:786b5e36205b88758bd3518725ec8cfe7a8173f5269354641f581c6b80a99893"},
{file = "publicsuffix2-2.20191221.tar.gz", hash = "sha256:00f8cc31aa8d0d5592a5ced19cccba7de428ebca985db26ac852d920ddd6fe7b"},
@ -966,6 +1200,10 @@ pydispatcher = [
{file = "PyDispatcher-2.0.5.tar.gz", hash = "sha256:5570069e1b1769af1fe481de6dd1d3a388492acddd2cdad7a3bde145615d5caf"},
{file = "PyDispatcher-2.0.5.zip", hash = "sha256:5be4a8be12805ef7d712dd9a93284fb8bc53f309867e573f653a72e5fd10e433"},
]
pygments = [
{file = "Pygments-2.6.1-py3-none-any.whl", hash = "sha256:ff7a40b4860b727ab48fad6360eb351cc1b33cbf9b15a0f689ca5353e9463324"},
{file = "Pygments-2.6.1.tar.gz", hash = "sha256:647344a061c249a3b74e230c739f434d7ea4d8b1d5f3721bc0f3558049b38f44"},
]
pyhamcrest = [
{file = "PyHamcrest-2.0.2-py3-none-any.whl", hash = "sha256:7ead136e03655af85069b6f47b23eb7c3e5c221aa9f022a4fbb499f5b7308f29"},
{file = "PyHamcrest-2.0.2.tar.gz", hash = "sha256:412e00137858f04bde0729913874a48485665f2d36fe9ee449f26be864af9316"},
@ -1016,6 +1254,10 @@ soupsieve = [
{file = "soupsieve-2.0-py2.py3-none-any.whl", hash = "sha256:fcd71e08c0aee99aca1b73f45478549ee7e7fc006d51b37bec9e9def7dc22b69"},
{file = "soupsieve-2.0.tar.gz", hash = "sha256:e914534802d7ffd233242b785229d5ba0766a7f487385e3f714446a07bf540ae"},
]
traitlets = [
{file = "traitlets-4.3.3-py2.py3-none-any.whl", hash = "sha256:70b4c6a1d9019d7b4f6846832288f86998aa3b9207c6821f3578a6a6a467fe44"},
{file = "traitlets-4.3.3.tar.gz", hash = "sha256:d023ee369ddd2763310e4c3eae1ff649689440d4ae59d7485eb4cfbbe3e359f7"},
]
twisted = [
{file = "Twisted-19.10.0-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:257dbc78e72bc69c2970035fc74df54b04573d5ddd380251a8a23f74d619db03"},
{file = "Twisted-19.10.0-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:a1de7598ce977943b3edbcc0a7d2112f134cc1b98b2fd7e348ee9e0bef092e50"},
@ -1073,6 +1315,10 @@ w3lib = [
{file = "w3lib-1.21.0-py2.py3-none-any.whl", hash = "sha256:847704b837b2b973cddef6938325d466628e6078266bc2e1f7ac49ba85c34823"},
{file = "w3lib-1.21.0.tar.gz", hash = "sha256:8b1854fef570b5a5fc84d960e025debd110485d73fd283580376104762774315"},
]
wcwidth = [
{file = "wcwidth-0.1.8-py2.py3-none-any.whl", hash = "sha256:8fd29383f539be45b20bd4df0dc29c20ba48654a41e661925e612311e9f3c603"},
{file = "wcwidth-0.1.8.tar.gz", hash = "sha256:f28b3e8a6483e5d49e7f8949ac1a78314e740333ae305b4ba5defd3e74fb37a8"},
]
werkzeug = [
{file = "Werkzeug-1.0.0-py2.py3-none-any.whl", hash = "sha256:6dc65cf9091cf750012f56f2cad759fa9e879f511b5ff8685e456b4e3bf90d16"},
{file = "Werkzeug-1.0.0.tar.gz", hash = "sha256:169ba8a33788476292d04186ab33b01d6add475033dfc07215e6d219cc077096"},

View File

@ -42,9 +42,11 @@ pylookyloo = {path = "client"}
beautifulsoup4 = "^4.8.2"
bootstrap-flask = "^1.2.0"
cloudscraper = "^1.2.20"
defang = "^0.5.3"
[tool.poetry.dev-dependencies]
mypy = "^0.761"
ipython = "^7.13.0"
[build-system]
requires = ["poetry>=0.12"]