chg: move pylookyloo to a dedicated repository

pull/86/head
Raphaël Vinot 2020-09-08 13:48:10 +02:00
parent fbeb7a564f
commit 2595a8891c
9 changed files with 16 additions and 323 deletions

View File

@ -1,68 +0,0 @@
# PyLookyloo
This is the client API for [Lookyloo](https://github.com/Lookyloo/lookyloo).
## Installation
```bash
pip install pylookyloo
```
## Usage
* You can use the lookyloo command to enqueue an URL.
```bash
usage: lookyloo [-h] [--url URL] --query QUERY
Enqueue a URL on Lookyloo.
optional arguments:
-h, --help show this help message and exit
--url URL URL of the instance (defaults to https://lookyloo.circl.lu/,
the public instance).
--query QUERY URL to enqueue.
--listing Should the report be publicly listed.
--redirects Get redirects for a given capture.
The response is the permanent URL where you can see the result of the capture.
```
* Or as a library
```python
from pylookyloo import Lookyloo
lookyloo = Lookyloo('https://url.of.lookyloo.instance')
if lookyloo.is_up: # to make sure it is up and reachable
permaurl = lookyloo.enqueue('http://url.to.lookup')
```
You can add the following paramaters to the enqueue fuction:
```
quiet Return only the uuid
listing Should the report be publicly listed.
user_agent Set your own user agent
Depth Set the analysis depth. Can not be more than in config
```
To retrieve the redirects (json)
```python
redirect = lookyloo.get_redirects(uuid)
```
To retrieve the cookies (json)
```python
cookies = lookyloo.get_cookies(uuid)
```
To retrieve the screenshot (raw)
```python
screen = lookyloo.get_screenshot(uuid)
```
To retrieve the html (raw)
```python
html = lookyloo.get_html(uuid)
```
To retrieve the complete capture(raw)
```python
capture = lookyloo.get_complete_capture(uuid)
```

80
client/poetry.lock generated
View File

@ -1,80 +0,0 @@
[[package]]
category = "main"
description = "Python package for providing Mozilla's CA Bundle."
name = "certifi"
optional = false
python-versions = "*"
version = "2020.6.20"
[[package]]
category = "main"
description = "Universal encoding detector for Python 2 and 3"
name = "chardet"
optional = false
python-versions = "*"
version = "3.0.4"
[[package]]
category = "main"
description = "Internationalized Domain Names in Applications (IDNA)"
name = "idna"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
version = "2.10"
[[package]]
category = "main"
description = "Python HTTP for Humans."
name = "requests"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
version = "2.24.0"
[package.dependencies]
certifi = ">=2017.4.17"
chardet = ">=3.0.2,<4"
idna = ">=2.5,<3"
urllib3 = ">=1.21.1,<1.25.0 || >1.25.0,<1.25.1 || >1.25.1,<1.26"
[package.extras]
security = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)"]
socks = ["PySocks (>=1.5.6,<1.5.7 || >1.5.7)", "win-inet-pton"]
[[package]]
category = "main"
description = "HTTP library with thread-safe connection pooling, file post, and more."
name = "urllib3"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, <4"
version = "1.25.10"
[package.extras]
brotli = ["brotlipy (>=0.6.0)"]
secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "pyOpenSSL (>=0.14)", "ipaddress"]
socks = ["PySocks (>=1.5.6,<1.5.7 || >1.5.7,<2.0)"]
[metadata]
content-hash = "4d17f9e80f90dd84c09b8f46829319f9ba04c4a24359b783035c9f15e84a8115"
python-versions = "^3.6"
[metadata.files]
certifi = [
{file = "certifi-2020.6.20-py2.py3-none-any.whl", hash = "sha256:8fc0819f1f30ba15bdb34cceffb9ef04d99f420f68eb75d901e9560b8749fc41"},
{file = "certifi-2020.6.20.tar.gz", hash = "sha256:5930595817496dd21bb8dc35dad090f1c2cd0adfaf21204bf6732ca5d8ee34d3"},
]
chardet = [
{file = "chardet-3.0.4-py2.py3-none-any.whl", hash = "sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691"},
{file = "chardet-3.0.4.tar.gz", hash = "sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae"},
]
idna = [
{file = "idna-2.10-py2.py3-none-any.whl", hash = "sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0"},
{file = "idna-2.10.tar.gz", hash = "sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6"},
]
requests = [
{file = "requests-2.24.0-py2.py3-none-any.whl", hash = "sha256:fe75cc94a9443b9246fc7049224f75604b113c36acb93f87b80ed42c44cbb898"},
{file = "requests-2.24.0.tar.gz", hash = "sha256:b3559a131db72c33ee969480840fff4bb6dd111de7dd27c8ee1f820f4f00231b"},
]
urllib3 = [
{file = "urllib3-1.25.10-py2.py3-none-any.whl", hash = "sha256:e7983572181f5e1522d9c98453462384ee92a0be7fac5f1413a1e35c56cc0461"},
{file = "urllib3-1.25.10.tar.gz", hash = "sha256:91056c15fa70756691db97756772bb1eb9678fa585d9184f24534b100dc60f4a"},
]

View File

@ -1,28 +0,0 @@
import argparse
import json
from .api import Lookyloo
def main():
parser = argparse.ArgumentParser(description='Enqueue a URL on Lookyloo.', epilog='The response is the permanent URL where you can see the result of the capture.')
parser.add_argument('--url', type=str, help='URL of the instance (defaults to https://lookyloo.circl.lu/, the public instance).')
parser.add_argument('--query', help='URL to enqueue.')
parser.add_argument('--listing', default=False, action='store_true', help='Should the report be publicly listed.')
parser.add_argument('--redirects', help='Get redirects for a given capture.')
args = parser.parse_args()
if args.url:
lookyloo = Lookyloo(args.url)
else:
lookyloo = Lookyloo()
if lookyloo.is_up:
if args.query:
url = lookyloo.enqueue(args.query, listing=args.listing)
print(url)
else:
response = lookyloo.get_redirects(args.redirects)
print(json.dumps(response))
else:
print(f'Unable to reach {lookyloo.root_url}. Is the server up?')

View File

@ -1,62 +0,0 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from io import BytesIO, StringIO
from typing import Optional, Dict, Any, List
from urllib.parse import urljoin
from pathlib import Path
import requests
class Lookyloo():
def __init__(self, root_url: str='https://lookyloo.circl.lu/'):
self.root_url = root_url
if not self.root_url.endswith('/'):
self.root_url += '/'
self.session = requests.session()
@property
def is_up(self) -> bool:
r = self.session.head(self.root_url)
return r.status_code == 200
def enqueue(self, url: Optional[str]=None, quiet: bool=False, **kwargs) -> str:
'''Enqueue an URL.
:param url: URL to enqueue
:param quiet: Returns the UUID only, instead of the whole URL
:param kwargs: accepts all the parameters supported by `Lookyloo.scrape`
'''
if not url and 'url' not in kwargs:
raise Exception(f'url entry required: {kwargs}')
if url:
to_send = {'url': url, **kwargs}
else:
to_send = kwargs
response = self.session.post(urljoin(self.root_url, 'submit'), json=to_send)
if quiet:
return response.text
else:
return urljoin(self.root_url, f'tree/{response.text}')
def get_redirects(self, capture_uuid: str) -> Dict[str, Any]:
r = self.session.get(urljoin(self.root_url, str(Path('json', capture_uuid, 'redirects'))))
return r.json()
def get_screenshot(self, capture_uuid: str) -> BytesIO:
r = self.session.get(urljoin(self.root_url, str(Path('tree', capture_uuid, 'image'))))
return BytesIO(r.content)
def get_cookies(self, capture_uuid: str) -> List[Dict[str, str]]:
r = self.session.get(urljoin(self.root_url, str(Path('tree', capture_uuid, 'cookies'))))
return r.json()
def get_html(self, capture_uuid: str) -> StringIO:
r = self.session.get(urljoin(self.root_url, str(Path('tree', capture_uuid, 'html'))))
return StringIO(r.text)
def get_complete_capture(self, capture_uuid: str) -> BytesIO:
r = self.session.get(urljoin(self.root_url, str(Path('tree', capture_uuid, 'export'))))
return BytesIO(r.content)

View File

@ -1,39 +0,0 @@
[tool.poetry]
name = "pylookyloo"
version = "1.2"
description = "Python client for Lookyloo"
authors = ["Raphaël Vinot <raphael.vinot@circl.lu>"]
license = "AGPL-3.0-or-later"
repository = "https://github.com/CIRCL/lookyloo/client"
readme = "README.md"
classifiers = [
'License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)',
'Development Status :: 5 - Production/Stable',
'Environment :: Console',
'Operating System :: POSIX :: Linux',
'Intended Audience :: Science/Research',
'Intended Audience :: Telecommunications Industry',
'Intended Audience :: Information Technology',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
'Topic :: Security',
'Topic :: Internet',
]
include = ['README.md']
[tool.poetry.scripts]
lookyloo = 'pylookyloo:main'
[tool.poetry.dependencies]
python = "^3.6"
requests = "^2.22.0"
[tool.poetry.dev-dependencies]
[build-system]
requires = ["poetry>=0.12"]
build-backend = "poetry.masonry.api"

View File

@ -1,28 +0,0 @@
# -*- coding: utf-8 -*-
from setuptools import setup
setup(
name='pylookyloo',
version='1.0-dev',
author='Raphaël Vinot',
author_email='raphael.vinot@circl.lu',
maintainer='Raphaël Vinot',
url='https://github.com/Lookyloo/lookyloo/client',
description='Python client for Lookyloo',
packages=['pylookyloo'],
entry_points={"console_scripts": ["lookyloo = pylookyloo:main"]},
install_requires=['requests'],
classifiers=[
'License :: OSI Approved :: BSD License',
'Development Status :: 5 - Production/Stable',
'Environment :: Console',
'Operating System :: POSIX :: Linux',
'Intended Audience :: Science/Research',
'Intended Audience :: Telecommunications Industry',
'Intended Audience :: Information Technology',
'Programming Language :: Python :: 3',
'Topic :: Security',
'Topic :: Internet',
]
)

32
poetry.lock generated
View File

@ -56,12 +56,13 @@ description = "Classes Without Boilerplate"
name = "attrs" name = "attrs"
optional = false optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
version = "20.1.0" version = "20.2.0"
[package.extras] [package.extras]
dev = ["coverage (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "zope.interface", "sphinx", "sphinx-rtd-theme", "pre-commit"] dev = ["coverage (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "zope.interface", "sphinx", "sphinx-rtd-theme", "pre-commit"]
docs = ["sphinx", "sphinx-rtd-theme", "zope.interface"] docs = ["sphinx", "sphinx-rtd-theme", "zope.interface"]
tests = ["coverage (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "zope.interface"] tests = ["coverage (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "zope.interface"]
tests_no_zope = ["coverage (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six"]
[[package]] [[package]]
category = "main" category = "main"
@ -702,20 +703,14 @@ version = "2.0.2"
[[package]] [[package]]
category = "main" category = "main"
description = "" description = "Python client for Lookyloo"
develop = true
name = "pylookyloo" name = "pylookyloo"
optional = false optional = false
python-versions = "^3.6" python-versions = ">=3.6,<4.0"
version = "1.2" version = "1.2"
[package.dependencies] [package.dependencies]
requests = "^2.22.0" requests = ">=2.22.0,<3.0.0"
[package.source]
reference = ""
type = "directory"
url = "client"
[[package]] [[package]]
category = "main" category = "main"
@ -948,7 +943,7 @@ description = "Traitlets Python configuration system"
name = "traitlets" name = "traitlets"
optional = false optional = false
python-versions = ">=3.7" python-versions = ">=3.7"
version = "5.0.3" version = "5.0.4"
[package.dependencies] [package.dependencies]
ipython-genutils = "*" ipython-genutils = "*"
@ -1106,7 +1101,7 @@ test = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"] testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
[metadata] [metadata]
content-hash = "e274bd7d88d0ae1b2858920fdec1a918f8240120206b960e9f14e6cdfa62d42b" content-hash = "d2999e4ac30e58f07cc42a9a21ebd32f0d2dc6e4e2466f30356d87e7c8ce066e"
lock-version = "1.0" lock-version = "1.0"
python-versions = "^3.7" python-versions = "^3.7"
@ -1144,8 +1139,8 @@ atomicwrites = [
{file = "atomicwrites-1.4.0.tar.gz", hash = "sha256:ae70396ad1a434f9c7046fd2dd196fc04b12f9e91ffb859164193be8b6168a7a"}, {file = "atomicwrites-1.4.0.tar.gz", hash = "sha256:ae70396ad1a434f9c7046fd2dd196fc04b12f9e91ffb859164193be8b6168a7a"},
] ]
attrs = [ attrs = [
{file = "attrs-20.1.0-py2.py3-none-any.whl", hash = "sha256:2867b7b9f8326499ab5b0e2d12801fa5c98842d2cbd22b35112ae04bf85b4dff"}, {file = "attrs-20.2.0-py2.py3-none-any.whl", hash = "sha256:fce7fc47dfc976152e82d53ff92fa0407700c21acd20886a13777a0d20e655dc"},
{file = "attrs-20.1.0.tar.gz", hash = "sha256:0ef97238856430dcf9228e07f316aefc17e8939fc8507e18c6501b761ef1a42a"}, {file = "attrs-20.2.0.tar.gz", hash = "sha256:26b54ddbbb9ee1d34d5d3668dd37d6cf74990ab23c828c2888dccdceee395594"},
] ]
automat = [ automat = [
{file = "Automat-20.2.0-py2.py3-none-any.whl", hash = "sha256:b6feb6455337df834f6c9962d6ccf771515b7d939bca142b29c20c2376bc6111"}, {file = "Automat-20.2.0-py2.py3-none-any.whl", hash = "sha256:b6feb6455337df834f6c9962d6ccf771515b7d939bca142b29c20c2376bc6111"},
@ -1570,7 +1565,10 @@ pyhamcrest = [
{file = "PyHamcrest-2.0.2-py3-none-any.whl", hash = "sha256:7ead136e03655af85069b6f47b23eb7c3e5c221aa9f022a4fbb499f5b7308f29"}, {file = "PyHamcrest-2.0.2-py3-none-any.whl", hash = "sha256:7ead136e03655af85069b6f47b23eb7c3e5c221aa9f022a4fbb499f5b7308f29"},
{file = "PyHamcrest-2.0.2.tar.gz", hash = "sha256:412e00137858f04bde0729913874a48485665f2d36fe9ee449f26be864af9316"}, {file = "PyHamcrest-2.0.2.tar.gz", hash = "sha256:412e00137858f04bde0729913874a48485665f2d36fe9ee449f26be864af9316"},
] ]
pylookyloo = [] pylookyloo = [
{file = "pylookyloo-1.2-py3-none-any.whl", hash = "sha256:d5a3b43f5180d1890fbf709c4a98b73219e523da0eac4f45d69c2a31800aca37"},
{file = "pylookyloo-1.2.tar.gz", hash = "sha256:4c880b6f8cea9d0043dee6676ac239df73309c8721fa40302520d6c0788f5224"},
]
pyopenssl = [ pyopenssl = [
{file = "pyOpenSSL-19.1.0-py2.py3-none-any.whl", hash = "sha256:621880965a720b8ece2f1b2f54ea2071966ab00e2970ad2ce11d596102063504"}, {file = "pyOpenSSL-19.1.0-py2.py3-none-any.whl", hash = "sha256:621880965a720b8ece2f1b2f54ea2071966ab00e2970ad2ce11d596102063504"},
{file = "pyOpenSSL-19.1.0.tar.gz", hash = "sha256:9a24494b2602aaf402be5c9e30a0b82d4a5c67528fe8fb475e3f3bc00dd69507"}, {file = "pyOpenSSL-19.1.0.tar.gz", hash = "sha256:9a24494b2602aaf402be5c9e30a0b82d4a5c67528fe8fb475e3f3bc00dd69507"},
@ -1639,8 +1637,8 @@ toml = [
{file = "toml-0.10.1.tar.gz", hash = "sha256:926b612be1e5ce0634a2ca03470f95169cf16f939018233a670519cb4ac58b0f"}, {file = "toml-0.10.1.tar.gz", hash = "sha256:926b612be1e5ce0634a2ca03470f95169cf16f939018233a670519cb4ac58b0f"},
] ]
traitlets = [ traitlets = [
{file = "traitlets-5.0.3-py3-none-any.whl", hash = "sha256:8bdadb17a04c844f444cdefaa3dee47a12ff14cf6277b9eeda29bfa0659d5987"}, {file = "traitlets-5.0.4-py3-none-any.whl", hash = "sha256:9664ec0c526e48e7b47b7d14cd6b252efa03e0129011de0a9c1d70315d4309c3"},
{file = "traitlets-5.0.3.tar.gz", hash = "sha256:a2e91709a0330b6c5d497ed470b2feb1ed8da5c9dd807c6daab41f727b9391c9"}, {file = "traitlets-5.0.4.tar.gz", hash = "sha256:86c9351f94f95de9db8a04ad8e892da299a088a64fd283f9f6f18770ae5eae1b"},
] ]
twisted = [ twisted = [
{file = "Twisted-20.3.0-cp27-cp27m-macosx_10_6_intel.whl", hash = "sha256:cdbc4c7f0cd7a2218b575844e970f05a1be1861c607b0e048c9bceca0c4d42f7"}, {file = "Twisted-20.3.0-cp27-cp27m-macosx_10_6_intel.whl", hash = "sha256:cdbc4c7f0cd7a2218b575844e970f05a1be1861c607b0e048c9bceca0c4d42f7"},

View File

@ -36,7 +36,6 @@ flask = "^1.1.1"
gunicorn = {version = "^20.0.4"} gunicorn = {version = "^20.0.4"}
cchardet = "^2.1.5" cchardet = "^2.1.5"
redis = "^3.3.11" redis = "^3.3.11"
pylookyloo = {path = "client"}
beautifulsoup4 = "^4.8.2" beautifulsoup4 = "^4.8.2"
bootstrap-flask = "^1.2.0" bootstrap-flask = "^1.2.0"
cloudscraper = "^1.2.20" cloudscraper = "^1.2.20"
@ -47,6 +46,7 @@ pyeupi = "^1.0"
scrapysplashwrapper = "^1.2" scrapysplashwrapper = "^1.2"
pysanejs = "^1.1" pysanejs = "^1.1"
har2tree = "^1.2" har2tree = "^1.2"
pylookyloo = "^1.2"
[tool.poetry.dev-dependencies] [tool.poetry.dev-dependencies]
mypy = "^0.761" mypy = "^0.761"