2019-01-30 16:01:55 +01:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
2020-06-26 18:11:22 +02:00
|
|
|
from typing import Optional, Dict, Any
|
2019-01-30 16:01:55 +01:00
|
|
|
from urllib.parse import urljoin
|
2020-06-26 18:11:22 +02:00
|
|
|
from pathlib import Path
|
2019-01-30 16:01:55 +01:00
|
|
|
|
2020-04-21 18:41:57 +02:00
|
|
|
import requests
|
|
|
|
|
2019-01-30 16:01:55 +01:00
|
|
|
|
|
|
|
class Lookyloo():
|
|
|
|
|
|
|
|
def __init__(self, root_url: str='https://lookyloo.circl.lu/'):
|
|
|
|
self.root_url = root_url
|
|
|
|
if not self.root_url.endswith('/'):
|
|
|
|
self.root_url += '/'
|
|
|
|
self.session = requests.session()
|
|
|
|
|
|
|
|
@property
|
2020-03-16 17:18:06 +01:00
|
|
|
def is_up(self) -> bool:
|
2019-01-30 16:01:55 +01:00
|
|
|
r = self.session.head(self.root_url)
|
|
|
|
return r.status_code == 200
|
|
|
|
|
2020-06-26 18:11:22 +02:00
|
|
|
def enqueue(self, url: Optional[str]=None, quiet: bool=False, **kwargs) -> str:
|
|
|
|
'''Enqueue an URL.
|
|
|
|
:param url: URL to enqueue
|
|
|
|
:param quiet: Returns the UUID only, instead of the whole URL
|
|
|
|
:param kwargs: accepts all the parameters supported by `Lookyloo.scrape`
|
|
|
|
'''
|
2020-04-21 18:41:57 +02:00
|
|
|
if not url and 'url' not in kwargs:
|
|
|
|
raise Exception(f'url entry required: {kwargs}')
|
|
|
|
|
|
|
|
if url:
|
|
|
|
to_send = {'url': url, **kwargs}
|
|
|
|
else:
|
|
|
|
to_send = kwargs
|
|
|
|
response = self.session.post(urljoin(self.root_url, 'submit'), json=to_send)
|
2020-06-26 18:11:22 +02:00
|
|
|
if quiet:
|
|
|
|
return response.text
|
|
|
|
else:
|
|
|
|
return urljoin(self.root_url, f'tree/{response.text}')
|
|
|
|
|
|
|
|
def get_redirects(self, capture_uuid: str) -> Dict[str, Any]:
|
|
|
|
r = self.session.get(urljoin(self.root_url, str(Path('json', capture_uuid, 'redirects'))))
|
|
|
|
return r.json()
|