diff --git a/client/README.md b/client/README.md deleted file mode 100644 index bf12d8f8..00000000 --- a/client/README.md +++ /dev/null @@ -1,68 +0,0 @@ -# PyLookyloo - -This is the client API for [Lookyloo](https://github.com/Lookyloo/lookyloo). - -## Installation - -```bash -pip install pylookyloo -``` - -## Usage - -* You can use the lookyloo command to enqueue an URL. - -```bash -usage: lookyloo [-h] [--url URL] --query QUERY - -Enqueue a URL on Lookyloo. - -optional arguments: - -h, --help show this help message and exit - --url URL URL of the instance (defaults to https://lookyloo.circl.lu/, - the public instance). - --query QUERY URL to enqueue. - --listing Should the report be publicly listed. - --redirects Get redirects for a given capture. - -The response is the permanent URL where you can see the result of the capture. -``` - -* Or as a library - -```python - -from pylookyloo import Lookyloo - -lookyloo = Lookyloo('https://url.of.lookyloo.instance') -if lookyloo.is_up: # to make sure it is up and reachable - permaurl = lookyloo.enqueue('http://url.to.lookup') - -``` -You can add the following paramaters to the enqueue fuction: -``` - quiet Return only the uuid - listing Should the report be publicly listed. - user_agent Set your own user agent - Depth Set the analysis depth. Can not be more than in config -``` -To retrieve the redirects (json) -```python - redirect = lookyloo.get_redirects(uuid) -``` -To retrieve the cookies (json) -```python - cookies = lookyloo.get_cookies(uuid) -``` -To retrieve the screenshot (raw) -```python - screen = lookyloo.get_screenshot(uuid) -``` -To retrieve the html (raw) -```python - html = lookyloo.get_html(uuid) -``` -To retrieve the complete capture(raw) -```python - capture = lookyloo.get_complete_capture(uuid) -``` diff --git a/client/poetry.lock b/client/poetry.lock deleted file mode 100644 index fddccbf7..00000000 --- a/client/poetry.lock +++ /dev/null @@ -1,80 +0,0 @@ -[[package]] -category = "main" -description = "Python package for providing Mozilla's CA Bundle." -name = "certifi" -optional = false -python-versions = "*" -version = "2020.6.20" - -[[package]] -category = "main" -description = "Universal encoding detector for Python 2 and 3" -name = "chardet" -optional = false -python-versions = "*" -version = "3.0.4" - -[[package]] -category = "main" -description = "Internationalized Domain Names in Applications (IDNA)" -name = "idna" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" -version = "2.10" - -[[package]] -category = "main" -description = "Python HTTP for Humans." -name = "requests" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" -version = "2.24.0" - -[package.dependencies] -certifi = ">=2017.4.17" -chardet = ">=3.0.2,<4" -idna = ">=2.5,<3" -urllib3 = ">=1.21.1,<1.25.0 || >1.25.0,<1.25.1 || >1.25.1,<1.26" - -[package.extras] -security = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)"] -socks = ["PySocks (>=1.5.6,<1.5.7 || >1.5.7)", "win-inet-pton"] - -[[package]] -category = "main" -description = "HTTP library with thread-safe connection pooling, file post, and more." -name = "urllib3" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, <4" -version = "1.25.10" - -[package.extras] -brotli = ["brotlipy (>=0.6.0)"] -secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "pyOpenSSL (>=0.14)", "ipaddress"] -socks = ["PySocks (>=1.5.6,<1.5.7 || >1.5.7,<2.0)"] - -[metadata] -content-hash = "4d17f9e80f90dd84c09b8f46829319f9ba04c4a24359b783035c9f15e84a8115" -python-versions = "^3.6" - -[metadata.files] -certifi = [ - {file = "certifi-2020.6.20-py2.py3-none-any.whl", hash = "sha256:8fc0819f1f30ba15bdb34cceffb9ef04d99f420f68eb75d901e9560b8749fc41"}, - {file = "certifi-2020.6.20.tar.gz", hash = "sha256:5930595817496dd21bb8dc35dad090f1c2cd0adfaf21204bf6732ca5d8ee34d3"}, -] -chardet = [ - {file = "chardet-3.0.4-py2.py3-none-any.whl", hash = "sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691"}, - {file = "chardet-3.0.4.tar.gz", hash = "sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae"}, -] -idna = [ - {file = "idna-2.10-py2.py3-none-any.whl", hash = "sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0"}, - {file = "idna-2.10.tar.gz", hash = "sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6"}, -] -requests = [ - {file = "requests-2.24.0-py2.py3-none-any.whl", hash = "sha256:fe75cc94a9443b9246fc7049224f75604b113c36acb93f87b80ed42c44cbb898"}, - {file = "requests-2.24.0.tar.gz", hash = "sha256:b3559a131db72c33ee969480840fff4bb6dd111de7dd27c8ee1f820f4f00231b"}, -] -urllib3 = [ - {file = "urllib3-1.25.10-py2.py3-none-any.whl", hash = "sha256:e7983572181f5e1522d9c98453462384ee92a0be7fac5f1413a1e35c56cc0461"}, - {file = "urllib3-1.25.10.tar.gz", hash = "sha256:91056c15fa70756691db97756772bb1eb9678fa585d9184f24534b100dc60f4a"}, -] diff --git a/client/pylookyloo/__init__.py b/client/pylookyloo/__init__.py deleted file mode 100644 index ef455324..00000000 --- a/client/pylookyloo/__init__.py +++ /dev/null @@ -1,28 +0,0 @@ -import argparse -import json - -from .api import Lookyloo - - -def main(): - parser = argparse.ArgumentParser(description='Enqueue a URL on Lookyloo.', epilog='The response is the permanent URL where you can see the result of the capture.') - parser.add_argument('--url', type=str, help='URL of the instance (defaults to https://lookyloo.circl.lu/, the public instance).') - parser.add_argument('--query', help='URL to enqueue.') - parser.add_argument('--listing', default=False, action='store_true', help='Should the report be publicly listed.') - parser.add_argument('--redirects', help='Get redirects for a given capture.') - args = parser.parse_args() - - if args.url: - lookyloo = Lookyloo(args.url) - else: - lookyloo = Lookyloo() - - if lookyloo.is_up: - if args.query: - url = lookyloo.enqueue(args.query, listing=args.listing) - print(url) - else: - response = lookyloo.get_redirects(args.redirects) - print(json.dumps(response)) - else: - print(f'Unable to reach {lookyloo.root_url}. Is the server up?') diff --git a/client/pylookyloo/api.py b/client/pylookyloo/api.py deleted file mode 100644 index 11238a2d..00000000 --- a/client/pylookyloo/api.py +++ /dev/null @@ -1,62 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -from io import BytesIO, StringIO -from typing import Optional, Dict, Any, List -from urllib.parse import urljoin -from pathlib import Path - -import requests - - -class Lookyloo(): - - def __init__(self, root_url: str='https://lookyloo.circl.lu/'): - self.root_url = root_url - if not self.root_url.endswith('/'): - self.root_url += '/' - self.session = requests.session() - - @property - def is_up(self) -> bool: - r = self.session.head(self.root_url) - return r.status_code == 200 - - def enqueue(self, url: Optional[str]=None, quiet: bool=False, **kwargs) -> str: - '''Enqueue an URL. - :param url: URL to enqueue - :param quiet: Returns the UUID only, instead of the whole URL - :param kwargs: accepts all the parameters supported by `Lookyloo.scrape` - ''' - if not url and 'url' not in kwargs: - raise Exception(f'url entry required: {kwargs}') - - if url: - to_send = {'url': url, **kwargs} - else: - to_send = kwargs - response = self.session.post(urljoin(self.root_url, 'submit'), json=to_send) - if quiet: - return response.text - else: - return urljoin(self.root_url, f'tree/{response.text}') - - def get_redirects(self, capture_uuid: str) -> Dict[str, Any]: - r = self.session.get(urljoin(self.root_url, str(Path('json', capture_uuid, 'redirects')))) - return r.json() - - def get_screenshot(self, capture_uuid: str) -> BytesIO: - r = self.session.get(urljoin(self.root_url, str(Path('tree', capture_uuid, 'image')))) - return BytesIO(r.content) - - def get_cookies(self, capture_uuid: str) -> List[Dict[str, str]]: - r = self.session.get(urljoin(self.root_url, str(Path('tree', capture_uuid, 'cookies')))) - return r.json() - - def get_html(self, capture_uuid: str) -> StringIO: - r = self.session.get(urljoin(self.root_url, str(Path('tree', capture_uuid, 'html')))) - return StringIO(r.text) - - def get_complete_capture(self, capture_uuid: str) -> BytesIO: - r = self.session.get(urljoin(self.root_url, str(Path('tree', capture_uuid, 'export')))) - return BytesIO(r.content) diff --git a/client/pylookyloo/py.typed b/client/pylookyloo/py.typed deleted file mode 100644 index e69de29b..00000000 diff --git a/client/pyproject.toml b/client/pyproject.toml deleted file mode 100644 index 0204dc1c..00000000 --- a/client/pyproject.toml +++ /dev/null @@ -1,39 +0,0 @@ -[tool.poetry] -name = "pylookyloo" -version = "1.2" -description = "Python client for Lookyloo" -authors = ["Raphaël Vinot "] -license = "AGPL-3.0-or-later" -repository = "https://github.com/CIRCL/lookyloo/client" - -readme = "README.md" - -classifiers = [ - 'License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)', - 'Development Status :: 5 - Production/Stable', - 'Environment :: Console', - 'Operating System :: POSIX :: Linux', - 'Intended Audience :: Science/Research', - 'Intended Audience :: Telecommunications Industry', - 'Intended Audience :: Information Technology', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', - 'Topic :: Security', - 'Topic :: Internet', -] - -include = ['README.md'] - -[tool.poetry.scripts] -lookyloo = 'pylookyloo:main' - -[tool.poetry.dependencies] -python = "^3.6" -requests = "^2.22.0" - -[tool.poetry.dev-dependencies] - -[build-system] -requires = ["poetry>=0.12"] -build-backend = "poetry.masonry.api" diff --git a/client/setup.py b/client/setup.py deleted file mode 100644 index 0a2c7d64..00000000 --- a/client/setup.py +++ /dev/null @@ -1,28 +0,0 @@ -# -*- coding: utf-8 -*- -from setuptools import setup - - -setup( - name='pylookyloo', - version='1.0-dev', - author='Raphaël Vinot', - author_email='raphael.vinot@circl.lu', - maintainer='Raphaël Vinot', - url='https://github.com/Lookyloo/lookyloo/client', - description='Python client for Lookyloo', - packages=['pylookyloo'], - entry_points={"console_scripts": ["lookyloo = pylookyloo:main"]}, - install_requires=['requests'], - classifiers=[ - 'License :: OSI Approved :: BSD License', - 'Development Status :: 5 - Production/Stable', - 'Environment :: Console', - 'Operating System :: POSIX :: Linux', - 'Intended Audience :: Science/Research', - 'Intended Audience :: Telecommunications Industry', - 'Intended Audience :: Information Technology', - 'Programming Language :: Python :: 3', - 'Topic :: Security', - 'Topic :: Internet', - ] -) diff --git a/poetry.lock b/poetry.lock index 45298a91..f0b4fbaa 100644 --- a/poetry.lock +++ b/poetry.lock @@ -56,12 +56,13 @@ description = "Classes Without Boilerplate" name = "attrs" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" -version = "20.1.0" +version = "20.2.0" [package.extras] dev = ["coverage (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "zope.interface", "sphinx", "sphinx-rtd-theme", "pre-commit"] docs = ["sphinx", "sphinx-rtd-theme", "zope.interface"] tests = ["coverage (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "zope.interface"] +tests_no_zope = ["coverage (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six"] [[package]] category = "main" @@ -702,20 +703,14 @@ version = "2.0.2" [[package]] category = "main" -description = "" -develop = true +description = "Python client for Lookyloo" name = "pylookyloo" optional = false -python-versions = "^3.6" +python-versions = ">=3.6,<4.0" version = "1.2" [package.dependencies] -requests = "^2.22.0" - -[package.source] -reference = "" -type = "directory" -url = "client" +requests = ">=2.22.0,<3.0.0" [[package]] category = "main" @@ -948,7 +943,7 @@ description = "Traitlets Python configuration system" name = "traitlets" optional = false python-versions = ">=3.7" -version = "5.0.3" +version = "5.0.4" [package.dependencies] ipython-genutils = "*" @@ -1106,7 +1101,7 @@ test = ["coverage (>=5.0.3)", "zope.event", "zope.testing"] testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"] [metadata] -content-hash = "e274bd7d88d0ae1b2858920fdec1a918f8240120206b960e9f14e6cdfa62d42b" +content-hash = "d2999e4ac30e58f07cc42a9a21ebd32f0d2dc6e4e2466f30356d87e7c8ce066e" lock-version = "1.0" python-versions = "^3.7" @@ -1144,8 +1139,8 @@ atomicwrites = [ {file = "atomicwrites-1.4.0.tar.gz", hash = "sha256:ae70396ad1a434f9c7046fd2dd196fc04b12f9e91ffb859164193be8b6168a7a"}, ] attrs = [ - {file = "attrs-20.1.0-py2.py3-none-any.whl", hash = "sha256:2867b7b9f8326499ab5b0e2d12801fa5c98842d2cbd22b35112ae04bf85b4dff"}, - {file = "attrs-20.1.0.tar.gz", hash = "sha256:0ef97238856430dcf9228e07f316aefc17e8939fc8507e18c6501b761ef1a42a"}, + {file = "attrs-20.2.0-py2.py3-none-any.whl", hash = "sha256:fce7fc47dfc976152e82d53ff92fa0407700c21acd20886a13777a0d20e655dc"}, + {file = "attrs-20.2.0.tar.gz", hash = "sha256:26b54ddbbb9ee1d34d5d3668dd37d6cf74990ab23c828c2888dccdceee395594"}, ] automat = [ {file = "Automat-20.2.0-py2.py3-none-any.whl", hash = "sha256:b6feb6455337df834f6c9962d6ccf771515b7d939bca142b29c20c2376bc6111"}, @@ -1570,7 +1565,10 @@ pyhamcrest = [ {file = "PyHamcrest-2.0.2-py3-none-any.whl", hash = "sha256:7ead136e03655af85069b6f47b23eb7c3e5c221aa9f022a4fbb499f5b7308f29"}, {file = "PyHamcrest-2.0.2.tar.gz", hash = "sha256:412e00137858f04bde0729913874a48485665f2d36fe9ee449f26be864af9316"}, ] -pylookyloo = [] +pylookyloo = [ + {file = "pylookyloo-1.2-py3-none-any.whl", hash = "sha256:d5a3b43f5180d1890fbf709c4a98b73219e523da0eac4f45d69c2a31800aca37"}, + {file = "pylookyloo-1.2.tar.gz", hash = "sha256:4c880b6f8cea9d0043dee6676ac239df73309c8721fa40302520d6c0788f5224"}, +] pyopenssl = [ {file = "pyOpenSSL-19.1.0-py2.py3-none-any.whl", hash = "sha256:621880965a720b8ece2f1b2f54ea2071966ab00e2970ad2ce11d596102063504"}, {file = "pyOpenSSL-19.1.0.tar.gz", hash = "sha256:9a24494b2602aaf402be5c9e30a0b82d4a5c67528fe8fb475e3f3bc00dd69507"}, @@ -1639,8 +1637,8 @@ toml = [ {file = "toml-0.10.1.tar.gz", hash = "sha256:926b612be1e5ce0634a2ca03470f95169cf16f939018233a670519cb4ac58b0f"}, ] traitlets = [ - {file = "traitlets-5.0.3-py3-none-any.whl", hash = "sha256:8bdadb17a04c844f444cdefaa3dee47a12ff14cf6277b9eeda29bfa0659d5987"}, - {file = "traitlets-5.0.3.tar.gz", hash = "sha256:a2e91709a0330b6c5d497ed470b2feb1ed8da5c9dd807c6daab41f727b9391c9"}, + {file = "traitlets-5.0.4-py3-none-any.whl", hash = "sha256:9664ec0c526e48e7b47b7d14cd6b252efa03e0129011de0a9c1d70315d4309c3"}, + {file = "traitlets-5.0.4.tar.gz", hash = "sha256:86c9351f94f95de9db8a04ad8e892da299a088a64fd283f9f6f18770ae5eae1b"}, ] twisted = [ {file = "Twisted-20.3.0-cp27-cp27m-macosx_10_6_intel.whl", hash = "sha256:cdbc4c7f0cd7a2218b575844e970f05a1be1861c607b0e048c9bceca0c4d42f7"}, diff --git a/pyproject.toml b/pyproject.toml index b2958592..8d242b1f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,7 +36,6 @@ flask = "^1.1.1" gunicorn = {version = "^20.0.4"} cchardet = "^2.1.5" redis = "^3.3.11" -pylookyloo = {path = "client"} beautifulsoup4 = "^4.8.2" bootstrap-flask = "^1.2.0" cloudscraper = "^1.2.20" @@ -47,6 +46,7 @@ pyeupi = "^1.0" scrapysplashwrapper = "^1.2" pysanejs = "^1.1" har2tree = "^1.2" +pylookyloo = "^1.2" [tool.poetry.dev-dependencies] mypy = "^0.761"