From ae49fd2c11faec234c5004fb1bd4edcb2f3894dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Vinot?= Date: Tue, 21 Apr 2020 18:41:57 +0200 Subject: [PATCH] new: Add support to pass more parameters via the API --- client/pylookyloo/__init__.py | 3 ++- client/pylookyloo/api.py | 17 ++++++++++++----- lookyloo/lookyloo.py | 14 +++++++------- poetry.lock | 22 ++-------------------- website/web/__init__.py | 1 - 5 files changed, 23 insertions(+), 34 deletions(-) diff --git a/client/pylookyloo/__init__.py b/client/pylookyloo/__init__.py index 26c5211a..a9515344 100644 --- a/client/pylookyloo/__init__.py +++ b/client/pylookyloo/__init__.py @@ -7,6 +7,7 @@ def main(): parser = argparse.ArgumentParser(description='Enqueue a URL on Lookyloo.', epilog='The response is the permanent URL where you can see the result of the capture.') parser.add_argument('--url', type=str, help='URL of the instance (defaults to https://lookyloo.circl.lu/, the public instance).') parser.add_argument('--query', required=True, help='URL to enqueue.') + parser.add_argument('--listing', default=False, action='store_true', help='Should the report be publicly listed.') args = parser.parse_args() if args.url: @@ -15,7 +16,7 @@ def main(): lookyloo = Lookyloo() if lookyloo.is_up: - url = lookyloo.enqueue(args.query) + url = lookyloo.enqueue(args.query, listing=args.listing) print(url) else: print(f'Unable to reach {lookyloo.root_url}. Is the server up?') diff --git a/client/pylookyloo/api.py b/client/pylookyloo/api.py index 40b4ac93..25d818b9 100644 --- a/client/pylookyloo/api.py +++ b/client/pylookyloo/api.py @@ -1,11 +1,11 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -import requests -import json - +from typing import Optional from urllib.parse import urljoin +import requests + class Lookyloo(): @@ -20,6 +20,13 @@ class Lookyloo(): r = self.session.head(self.root_url) return r.status_code == 200 - def enqueue(self, url: str) -> str: - response = self.session.post(urljoin(self.root_url, 'submit'), data=json.dumps({'url': url})) + def enqueue(self, url: Optional[str]=None, **kwargs) -> str: + if not url and 'url' not in kwargs: + raise Exception(f'url entry required: {kwargs}') + + if url: + to_send = {'url': url, **kwargs} + else: + to_send = kwargs + response = self.session.post(urljoin(self.root_url, 'submit'), json=to_send) return urljoin(self.root_url, f'tree/{response.text}') diff --git a/lookyloo/lookyloo.py b/lookyloo/lookyloo.py index bca7a0f8..8ff8a61f 100644 --- a/lookyloo/lookyloo.py +++ b/lookyloo/lookyloo.py @@ -218,6 +218,10 @@ class Lookyloo(): def enqueue_scrape(self, query: dict) -> str: perma_uuid = str(uuid4()) p = self.redis.pipeline() + for key, value in query.items(): + if isinstance(value, bool): + # Yes, empty string because that's False. + query[key] = 1 if value else '' p.hmset(perma_uuid, query) p.sadd('to_scrape', perma_uuid) p.execute() @@ -261,11 +265,6 @@ class Lookyloo(): except Har2TreeError as e: raise NoValidHarFile(e.message) - def cleanup_old_tmpfiles(self): - for tmpfile in pathlib.Path(tempfile.gettempdir()).glob('lookyloo*'): - if time.time() - tmpfile.stat().st_atime > 36000: - tmpfile.unlink() - def load_image(self, capture_dir: Path) -> BytesIO: with open(list(capture_dir.glob('*.png'))[0], 'rb') as f: return BytesIO(f.read()) @@ -275,8 +274,9 @@ class Lookyloo(): return self.sanejs.sha512(sha512) return {'response': []} - def scrape(self, url: str, cookies_pseudofile: Optional[BufferedIOBase]=None, depth: int=1, listing: bool=True, user_agent: Optional[str]=None, perma_uuid: str=None, - os: str=None, browser: str=None) -> Union[bool, str]: + def scrape(self, url: str, cookies_pseudofile: Optional[BufferedIOBase]=None, + depth: int=1, listing: bool=True, user_agent: Optional[str]=None, + perma_uuid: str=None, os: str=None, browser: str=None) -> Union[bool, str]: url = url.strip() url = refang(url) if not url.startswith('http'): diff --git a/poetry.lock b/poetry.lock index c0b4eb87..e1881ef7 100644 --- a/poetry.lock +++ b/poetry.lock @@ -197,7 +197,7 @@ description = "cryptography is a package which provides cryptographic recipes an name = "cryptography" optional = false python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*" -version = "2.9" +version = "2.9.1" [package.dependencies] cffi = ">=1.8,<1.11.3 || >1.11.3" @@ -1216,25 +1216,7 @@ constantly = [ {file = "constantly-15.1.0.tar.gz", hash = "sha256:586372eb92059873e29eba4f9dec8381541b4d3834660707faf8ba59146dfc35"}, ] cryptography = [ - {file = "cryptography-2.9-cp27-cp27m-macosx_10_9_intel.whl", hash = "sha256:ef9a55013676907df6c9d7dd943eb1770d014f68beaa7e73250fb43c759f4585"}, - {file = "cryptography-2.9-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:2a2ad24d43398d89f92209289f15265107928f22a8d10385f70def7a698d6a02"}, - {file = "cryptography-2.9-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:95e1296e0157361fe2f5f0ed307fd31f94b0ca13372e3673fa95095a627636a1"}, - {file = "cryptography-2.9-cp27-cp27m-win32.whl", hash = "sha256:192ca04a36852a994ef21df13cca4d822adbbdc9d5009c0f96f1d2929e375d4f"}, - {file = "cryptography-2.9-cp27-cp27m-win_amd64.whl", hash = "sha256:ed1d0760c7e46436ec90834d6f10477ff09475c692ed1695329d324b2c5cd547"}, - {file = "cryptography-2.9-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:19ae795137682a9778892fb4390c07811828b173741bce91e30f899424b3934d"}, - {file = "cryptography-2.9-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:d1bf5a1a0d60c7f9a78e448adcb99aa101f3f9588b16708044638881be15d6bc"}, - {file = "cryptography-2.9-cp35-abi3-macosx_10_9_intel.whl", hash = "sha256:1b9b535d6b55936a79dbe4990b64bb16048f48747c76c29713fea8c50eca2acf"}, - {file = "cryptography-2.9-cp35-abi3-manylinux1_x86_64.whl", hash = "sha256:7a279f33a081d436e90e91d1a7c338553c04e464de1c9302311a5e7e4b746088"}, - {file = "cryptography-2.9-cp35-abi3-manylinux2010_x86_64.whl", hash = "sha256:49870684da168b90110bbaf86140d4681032c5e6a2461adc7afdd93be5634216"}, - {file = "cryptography-2.9-cp35-cp35m-win32.whl", hash = "sha256:6b91cab3841b4c7cb70e4db1697c69f036c8bc0a253edc0baa6783154f1301e4"}, - {file = "cryptography-2.9-cp35-cp35m-win_amd64.whl", hash = "sha256:587f98ce27ac4547177a0c6fe0986b8736058daffe9160dcf5f1bd411b7fbaa1"}, - {file = "cryptography-2.9-cp36-cp36m-win32.whl", hash = "sha256:cc20316e3f5a6b582fc3b029d8dc03aabeb645acfcb7fc1d9848841a33265748"}, - {file = "cryptography-2.9-cp36-cp36m-win_amd64.whl", hash = "sha256:3be7a5722d5bfe69894d3f7bbed15547b17619f3a88a318aab2e37f457524164"}, - {file = "cryptography-2.9-cp37-cp37m-win32.whl", hash = "sha256:7598974f6879a338c785c513e7c5a4329fbc58b9f6b9a6305035fca5b1076552"}, - {file = "cryptography-2.9-cp37-cp37m-win_amd64.whl", hash = "sha256:5aca6f00b2f42546b9bdf11a69f248d1881212ce5b9e2618b04935b87f6f82a1"}, - {file = "cryptography-2.9-cp38-cp38-win32.whl", hash = "sha256:9fc9da390e98cb6975eadf251b6e5fa088820141061bf041cd5c72deba1dc526"}, - {file = "cryptography-2.9-cp38-cp38-win_amd64.whl", hash = "sha256:6b744039b55988519cc183149cceb573189b3e46e16ccf6f8c46798bb767c9dc"}, - {file = "cryptography-2.9.tar.gz", hash = "sha256:0cacd3ef5c604b8e5f59bf2582c076c98a37fe206b31430d0cd08138aff0986e"}, + {file = "cryptography-2.9.1.tar.gz", hash = "sha256:ce0bd68b4b946bd4bcebc3d4d1325bf0e938e445ae18cedddd60e33dd85a368e"}, ] cssselect = [ {file = "cssselect-1.1.0-py2.py3-none-any.whl", hash = "sha256:f612ee47b749c877ebae5bb77035d8f4202c6ad0f0fc1271b3c18ad6c4468ecf"}, diff --git a/website/web/__init__.py b/website/web/__init__.py index b0506ede..a12a084e 100644 --- a/website/web/__init__.py +++ b/website/web/__init__.py @@ -260,7 +260,6 @@ def index(): if request.method == 'HEAD': # Just returns ack if the webserver is running return 'Ack' - lookyloo.cleanup_old_tmpfiles() update_user_agents() titles = [] for capture_dir in lookyloo.capture_dirs: