mirror of https://github.com/CIRCL/lookyloo
new: Add support to pass more parameters via the API
parent
b387e989a5
commit
ae49fd2c11
|
@ -7,6 +7,7 @@ def main():
|
||||||
parser = argparse.ArgumentParser(description='Enqueue a URL on Lookyloo.', epilog='The response is the permanent URL where you can see the result of the capture.')
|
parser = argparse.ArgumentParser(description='Enqueue a URL on Lookyloo.', epilog='The response is the permanent URL where you can see the result of the capture.')
|
||||||
parser.add_argument('--url', type=str, help='URL of the instance (defaults to https://lookyloo.circl.lu/, the public instance).')
|
parser.add_argument('--url', type=str, help='URL of the instance (defaults to https://lookyloo.circl.lu/, the public instance).')
|
||||||
parser.add_argument('--query', required=True, help='URL to enqueue.')
|
parser.add_argument('--query', required=True, help='URL to enqueue.')
|
||||||
|
parser.add_argument('--listing', default=False, action='store_true', help='Should the report be publicly listed.')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
if args.url:
|
if args.url:
|
||||||
|
@ -15,7 +16,7 @@ def main():
|
||||||
lookyloo = Lookyloo()
|
lookyloo = Lookyloo()
|
||||||
|
|
||||||
if lookyloo.is_up:
|
if lookyloo.is_up:
|
||||||
url = lookyloo.enqueue(args.query)
|
url = lookyloo.enqueue(args.query, listing=args.listing)
|
||||||
print(url)
|
print(url)
|
||||||
else:
|
else:
|
||||||
print(f'Unable to reach {lookyloo.root_url}. Is the server up?')
|
print(f'Unable to reach {lookyloo.root_url}. Is the server up?')
|
||||||
|
|
|
@ -1,11 +1,11 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import requests
|
from typing import Optional
|
||||||
import json
|
|
||||||
|
|
||||||
from urllib.parse import urljoin
|
from urllib.parse import urljoin
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
|
||||||
class Lookyloo():
|
class Lookyloo():
|
||||||
|
|
||||||
|
@ -20,6 +20,13 @@ class Lookyloo():
|
||||||
r = self.session.head(self.root_url)
|
r = self.session.head(self.root_url)
|
||||||
return r.status_code == 200
|
return r.status_code == 200
|
||||||
|
|
||||||
def enqueue(self, url: str) -> str:
|
def enqueue(self, url: Optional[str]=None, **kwargs) -> str:
|
||||||
response = self.session.post(urljoin(self.root_url, 'submit'), data=json.dumps({'url': url}))
|
if not url and 'url' not in kwargs:
|
||||||
|
raise Exception(f'url entry required: {kwargs}')
|
||||||
|
|
||||||
|
if url:
|
||||||
|
to_send = {'url': url, **kwargs}
|
||||||
|
else:
|
||||||
|
to_send = kwargs
|
||||||
|
response = self.session.post(urljoin(self.root_url, 'submit'), json=to_send)
|
||||||
return urljoin(self.root_url, f'tree/{response.text}')
|
return urljoin(self.root_url, f'tree/{response.text}')
|
||||||
|
|
|
@ -218,6 +218,10 @@ class Lookyloo():
|
||||||
def enqueue_scrape(self, query: dict) -> str:
|
def enqueue_scrape(self, query: dict) -> str:
|
||||||
perma_uuid = str(uuid4())
|
perma_uuid = str(uuid4())
|
||||||
p = self.redis.pipeline()
|
p = self.redis.pipeline()
|
||||||
|
for key, value in query.items():
|
||||||
|
if isinstance(value, bool):
|
||||||
|
# Yes, empty string because that's False.
|
||||||
|
query[key] = 1 if value else ''
|
||||||
p.hmset(perma_uuid, query)
|
p.hmset(perma_uuid, query)
|
||||||
p.sadd('to_scrape', perma_uuid)
|
p.sadd('to_scrape', perma_uuid)
|
||||||
p.execute()
|
p.execute()
|
||||||
|
@ -261,11 +265,6 @@ class Lookyloo():
|
||||||
except Har2TreeError as e:
|
except Har2TreeError as e:
|
||||||
raise NoValidHarFile(e.message)
|
raise NoValidHarFile(e.message)
|
||||||
|
|
||||||
def cleanup_old_tmpfiles(self):
|
|
||||||
for tmpfile in pathlib.Path(tempfile.gettempdir()).glob('lookyloo*'):
|
|
||||||
if time.time() - tmpfile.stat().st_atime > 36000:
|
|
||||||
tmpfile.unlink()
|
|
||||||
|
|
||||||
def load_image(self, capture_dir: Path) -> BytesIO:
|
def load_image(self, capture_dir: Path) -> BytesIO:
|
||||||
with open(list(capture_dir.glob('*.png'))[0], 'rb') as f:
|
with open(list(capture_dir.glob('*.png'))[0], 'rb') as f:
|
||||||
return BytesIO(f.read())
|
return BytesIO(f.read())
|
||||||
|
@ -275,8 +274,9 @@ class Lookyloo():
|
||||||
return self.sanejs.sha512(sha512)
|
return self.sanejs.sha512(sha512)
|
||||||
return {'response': []}
|
return {'response': []}
|
||||||
|
|
||||||
def scrape(self, url: str, cookies_pseudofile: Optional[BufferedIOBase]=None, depth: int=1, listing: bool=True, user_agent: Optional[str]=None, perma_uuid: str=None,
|
def scrape(self, url: str, cookies_pseudofile: Optional[BufferedIOBase]=None,
|
||||||
os: str=None, browser: str=None) -> Union[bool, str]:
|
depth: int=1, listing: bool=True, user_agent: Optional[str]=None,
|
||||||
|
perma_uuid: str=None, os: str=None, browser: str=None) -> Union[bool, str]:
|
||||||
url = url.strip()
|
url = url.strip()
|
||||||
url = refang(url)
|
url = refang(url)
|
||||||
if not url.startswith('http'):
|
if not url.startswith('http'):
|
||||||
|
|
|
@ -197,7 +197,7 @@ description = "cryptography is a package which provides cryptographic recipes an
|
||||||
name = "cryptography"
|
name = "cryptography"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*"
|
python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*"
|
||||||
version = "2.9"
|
version = "2.9.1"
|
||||||
|
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
cffi = ">=1.8,<1.11.3 || >1.11.3"
|
cffi = ">=1.8,<1.11.3 || >1.11.3"
|
||||||
|
@ -1216,25 +1216,7 @@ constantly = [
|
||||||
{file = "constantly-15.1.0.tar.gz", hash = "sha256:586372eb92059873e29eba4f9dec8381541b4d3834660707faf8ba59146dfc35"},
|
{file = "constantly-15.1.0.tar.gz", hash = "sha256:586372eb92059873e29eba4f9dec8381541b4d3834660707faf8ba59146dfc35"},
|
||||||
]
|
]
|
||||||
cryptography = [
|
cryptography = [
|
||||||
{file = "cryptography-2.9-cp27-cp27m-macosx_10_9_intel.whl", hash = "sha256:ef9a55013676907df6c9d7dd943eb1770d014f68beaa7e73250fb43c759f4585"},
|
{file = "cryptography-2.9.1.tar.gz", hash = "sha256:ce0bd68b4b946bd4bcebc3d4d1325bf0e938e445ae18cedddd60e33dd85a368e"},
|
||||||
{file = "cryptography-2.9-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:2a2ad24d43398d89f92209289f15265107928f22a8d10385f70def7a698d6a02"},
|
|
||||||
{file = "cryptography-2.9-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:95e1296e0157361fe2f5f0ed307fd31f94b0ca13372e3673fa95095a627636a1"},
|
|
||||||
{file = "cryptography-2.9-cp27-cp27m-win32.whl", hash = "sha256:192ca04a36852a994ef21df13cca4d822adbbdc9d5009c0f96f1d2929e375d4f"},
|
|
||||||
{file = "cryptography-2.9-cp27-cp27m-win_amd64.whl", hash = "sha256:ed1d0760c7e46436ec90834d6f10477ff09475c692ed1695329d324b2c5cd547"},
|
|
||||||
{file = "cryptography-2.9-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:19ae795137682a9778892fb4390c07811828b173741bce91e30f899424b3934d"},
|
|
||||||
{file = "cryptography-2.9-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:d1bf5a1a0d60c7f9a78e448adcb99aa101f3f9588b16708044638881be15d6bc"},
|
|
||||||
{file = "cryptography-2.9-cp35-abi3-macosx_10_9_intel.whl", hash = "sha256:1b9b535d6b55936a79dbe4990b64bb16048f48747c76c29713fea8c50eca2acf"},
|
|
||||||
{file = "cryptography-2.9-cp35-abi3-manylinux1_x86_64.whl", hash = "sha256:7a279f33a081d436e90e91d1a7c338553c04e464de1c9302311a5e7e4b746088"},
|
|
||||||
{file = "cryptography-2.9-cp35-abi3-manylinux2010_x86_64.whl", hash = "sha256:49870684da168b90110bbaf86140d4681032c5e6a2461adc7afdd93be5634216"},
|
|
||||||
{file = "cryptography-2.9-cp35-cp35m-win32.whl", hash = "sha256:6b91cab3841b4c7cb70e4db1697c69f036c8bc0a253edc0baa6783154f1301e4"},
|
|
||||||
{file = "cryptography-2.9-cp35-cp35m-win_amd64.whl", hash = "sha256:587f98ce27ac4547177a0c6fe0986b8736058daffe9160dcf5f1bd411b7fbaa1"},
|
|
||||||
{file = "cryptography-2.9-cp36-cp36m-win32.whl", hash = "sha256:cc20316e3f5a6b582fc3b029d8dc03aabeb645acfcb7fc1d9848841a33265748"},
|
|
||||||
{file = "cryptography-2.9-cp36-cp36m-win_amd64.whl", hash = "sha256:3be7a5722d5bfe69894d3f7bbed15547b17619f3a88a318aab2e37f457524164"},
|
|
||||||
{file = "cryptography-2.9-cp37-cp37m-win32.whl", hash = "sha256:7598974f6879a338c785c513e7c5a4329fbc58b9f6b9a6305035fca5b1076552"},
|
|
||||||
{file = "cryptography-2.9-cp37-cp37m-win_amd64.whl", hash = "sha256:5aca6f00b2f42546b9bdf11a69f248d1881212ce5b9e2618b04935b87f6f82a1"},
|
|
||||||
{file = "cryptography-2.9-cp38-cp38-win32.whl", hash = "sha256:9fc9da390e98cb6975eadf251b6e5fa088820141061bf041cd5c72deba1dc526"},
|
|
||||||
{file = "cryptography-2.9-cp38-cp38-win_amd64.whl", hash = "sha256:6b744039b55988519cc183149cceb573189b3e46e16ccf6f8c46798bb767c9dc"},
|
|
||||||
{file = "cryptography-2.9.tar.gz", hash = "sha256:0cacd3ef5c604b8e5f59bf2582c076c98a37fe206b31430d0cd08138aff0986e"},
|
|
||||||
]
|
]
|
||||||
cssselect = [
|
cssselect = [
|
||||||
{file = "cssselect-1.1.0-py2.py3-none-any.whl", hash = "sha256:f612ee47b749c877ebae5bb77035d8f4202c6ad0f0fc1271b3c18ad6c4468ecf"},
|
{file = "cssselect-1.1.0-py2.py3-none-any.whl", hash = "sha256:f612ee47b749c877ebae5bb77035d8f4202c6ad0f0fc1271b3c18ad6c4468ecf"},
|
||||||
|
|
|
@ -260,7 +260,6 @@ def index():
|
||||||
if request.method == 'HEAD':
|
if request.method == 'HEAD':
|
||||||
# Just returns ack if the webserver is running
|
# Just returns ack if the webserver is running
|
||||||
return 'Ack'
|
return 'Ack'
|
||||||
lookyloo.cleanup_old_tmpfiles()
|
|
||||||
update_user_agents()
|
update_user_agents()
|
||||||
titles = []
|
titles = []
|
||||||
for capture_dir in lookyloo.capture_dirs:
|
for capture_dir in lookyloo.capture_dirs:
|
||||||
|
|
Loading…
Reference in New Issue