mirror of https://github.com/CIRCL/lookyloo
parent
07dd01a80b
commit
902c8f81b6
|
@ -54,15 +54,26 @@ class AsyncCapture(AbstractManager):
|
||||||
lazy_cleanup.zincrby('queues', -1, queue)
|
lazy_cleanup.zincrby('queues', -1, queue)
|
||||||
|
|
||||||
to_capture: Dict[str, str] = self.redis.hgetall(uuid)
|
to_capture: Dict[str, str] = self.redis.hgetall(uuid)
|
||||||
to_capture['perma_uuid'] = uuid
|
|
||||||
if 'cookies' in to_capture:
|
|
||||||
to_capture['cookies_pseudofile'] = to_capture.pop('cookies')
|
|
||||||
|
|
||||||
self.logger.info(f'Capturing {to_capture["url"]} - {uuid}')
|
self.logger.info(f'Capturing {to_capture["url"]} - {uuid}')
|
||||||
if self._capture(**to_capture): # type: ignore
|
success, error_message = self._capture(
|
||||||
|
to_capture['url'],
|
||||||
|
perma_uuid=uuid,
|
||||||
|
cookies_pseudofile=to_capture.get('cookies', None),
|
||||||
|
depth=int(to_capture.get('depth', 1)),
|
||||||
|
listing=True if ('listing' in to_capture and to_capture['listing'] is True) else False,
|
||||||
|
user_agent=to_capture.get('user_agent', None),
|
||||||
|
referer=to_capture.get('referer', ''),
|
||||||
|
proxy=to_capture.get('proxy', ''),
|
||||||
|
os=to_capture.get('os', None),
|
||||||
|
browser=to_capture.get('browser', None),
|
||||||
|
parent=to_capture.get('parent', None)
|
||||||
|
)
|
||||||
|
if success:
|
||||||
self.logger.info(f'Successfully captured {to_capture["url"]} - {uuid}')
|
self.logger.info(f'Successfully captured {to_capture["url"]} - {uuid}')
|
||||||
else:
|
else:
|
||||||
self.logger.warning(f'Unable to capture {to_capture["url"]} - {uuid}')
|
self.logger.warning(f'Unable to capture {to_capture["url"]} - {uuid}: {error_message}')
|
||||||
|
lazy_cleanup.setex(f'error_{uuid}', 36000, f'{error_message} - {to_capture["url"]} - {uuid}')
|
||||||
lazy_cleanup.srem('ongoing', uuid)
|
lazy_cleanup.srem('ongoing', uuid)
|
||||||
lazy_cleanup.delete(uuid)
|
lazy_cleanup.delete(uuid)
|
||||||
# make sure to expire the key if nothing was processed for a while (= queues empty)
|
# make sure to expire the key if nothing was processed for a while (= queues empty)
|
||||||
|
@ -72,7 +83,7 @@ class AsyncCapture(AbstractManager):
|
||||||
def _capture(self, url: str, *, perma_uuid: str, cookies_pseudofile: Optional[Union[BufferedIOBase, str]]=None,
|
def _capture(self, url: str, *, perma_uuid: str, cookies_pseudofile: Optional[Union[BufferedIOBase, str]]=None,
|
||||||
depth: int=1, listing: bool=True, user_agent: Optional[str]=None,
|
depth: int=1, listing: bool=True, user_agent: Optional[str]=None,
|
||||||
referer: str='', proxy: str='', os: Optional[str]=None,
|
referer: str='', proxy: str='', os: Optional[str]=None,
|
||||||
browser: Optional[str]=None, parent: Optional[str]=None) -> bool:
|
browser: Optional[str]=None, parent: Optional[str]=None) -> Tuple[bool, str]:
|
||||||
'''Launch a capture'''
|
'''Launch a capture'''
|
||||||
url = url.strip()
|
url = url.strip()
|
||||||
url = refang(url)
|
url = refang(url)
|
||||||
|
@ -87,11 +98,11 @@ class AsyncCapture(AbstractManager):
|
||||||
ip = socket.gethostbyname(splitted_url.hostname)
|
ip = socket.gethostbyname(splitted_url.hostname)
|
||||||
except socket.gaierror:
|
except socket.gaierror:
|
||||||
self.logger.info('Name or service not known')
|
self.logger.info('Name or service not known')
|
||||||
return False
|
return False, 'Name or service not known.'
|
||||||
if not ipaddress.ip_address(ip).is_global:
|
if not ipaddress.ip_address(ip).is_global:
|
||||||
return False
|
return False, 'Capturing ressources on private IPs is disabled.'
|
||||||
else:
|
else:
|
||||||
return False
|
return False, 'Unable to find hostname or IP in the query.'
|
||||||
|
|
||||||
cookies = load_cookies(cookies_pseudofile)
|
cookies = load_cookies(cookies_pseudofile)
|
||||||
if not user_agent:
|
if not user_agent:
|
||||||
|
@ -113,7 +124,7 @@ class AsyncCapture(AbstractManager):
|
||||||
if not items:
|
if not items:
|
||||||
# broken
|
# broken
|
||||||
self.logger.critical(f'Something went terribly wrong when capturing {url}.')
|
self.logger.critical(f'Something went terribly wrong when capturing {url}.')
|
||||||
return False
|
return False, 'Something went terribly wrong when capturing {url}.'
|
||||||
width = len(str(len(items)))
|
width = len(str(len(items)))
|
||||||
now = datetime.now()
|
now = datetime.now()
|
||||||
dirpath = self.capture_dir / str(now.year) / f'{now.month:02}' / now.isoformat()
|
dirpath = self.capture_dir / str(now.year) / f'{now.month:02}' / now.isoformat()
|
||||||
|
@ -171,7 +182,7 @@ class AsyncCapture(AbstractManager):
|
||||||
with (dirpath / '{0:0{width}}.cookies.json'.format(i, width=width)).open('w') as _cookies:
|
with (dirpath / '{0:0{width}}.cookies.json'.format(i, width=width)).open('w') as _cookies:
|
||||||
json.dump(cookies, _cookies)
|
json.dump(cookies, _cookies)
|
||||||
self.redis.hset('lookup_dirs', perma_uuid, str(dirpath))
|
self.redis.hset('lookup_dirs', perma_uuid, str(dirpath))
|
||||||
return True
|
return True, 'All good!'
|
||||||
|
|
||||||
def _to_run_forever(self):
|
def _to_run_forever(self):
|
||||||
while self.redis.exists('to_capture'):
|
while self.redis.exists('to_capture'):
|
||||||
|
|
|
@ -578,6 +578,9 @@ class Lookyloo():
|
||||||
return CaptureStatus.ONGOING
|
return CaptureStatus.ONGOING
|
||||||
return CaptureStatus.UNKNOWN
|
return CaptureStatus.UNKNOWN
|
||||||
|
|
||||||
|
def try_error_status(self, capture_uuid: str, /) -> Optional[str]:
|
||||||
|
return self.redis.get(f'error_{capture_uuid}')
|
||||||
|
|
||||||
def capture_cache(self, capture_uuid: str, /) -> Optional[CaptureCache]:
|
def capture_cache(self, capture_uuid: str, /) -> Optional[CaptureCache]:
|
||||||
"""Get the cache from redis."""
|
"""Get the cache from redis."""
|
||||||
if capture_uuid in self._captures_index and not self._captures_index[capture_uuid].incomplete_redirects:
|
if capture_uuid in self._captures_index and not self._captures_index[capture_uuid].incomplete_redirects:
|
||||||
|
|
|
@ -562,7 +562,11 @@ def tree(tree_uuid: str, node_uuid: Optional[str]=None):
|
||||||
flash(f'The capture module is not reachable ({splash_message}).', 'error')
|
flash(f'The capture module is not reachable ({splash_message}).', 'error')
|
||||||
flash('The request will be enqueued, but capturing may take a while and require the administrator to wake up.', 'error')
|
flash('The request will be enqueued, but capturing may take a while and require the administrator to wake up.', 'error')
|
||||||
if status == CaptureStatus.UNKNOWN:
|
if status == CaptureStatus.UNKNOWN:
|
||||||
flash(f'Unable to find this UUID ({tree_uuid}).', 'error')
|
error = lookyloo.try_error_status(tree_uuid)
|
||||||
|
if error:
|
||||||
|
flash(error, 'error')
|
||||||
|
else:
|
||||||
|
flash(f'Unable to find this UUID ({tree_uuid}).', 'error')
|
||||||
return redirect(url_for('index'))
|
return redirect(url_for('index'))
|
||||||
elif status == CaptureStatus.QUEUED:
|
elif status == CaptureStatus.QUEUED:
|
||||||
message = "The capture is queued, but didn't start yet."
|
message = "The capture is queued, but didn't start yet."
|
||||||
|
@ -572,10 +576,6 @@ def tree(tree_uuid: str, node_uuid: Optional[str]=None):
|
||||||
message = "The capture is ongoing."
|
message = "The capture is ongoing."
|
||||||
return render_template('tree_wait.html', message=message, tree_uuid=tree_uuid)
|
return render_template('tree_wait.html', message=message, tree_uuid=tree_uuid)
|
||||||
|
|
||||||
if not cache:
|
|
||||||
flash('Invalid cache.', 'error')
|
|
||||||
return redirect(url_for('index'))
|
|
||||||
|
|
||||||
if cache.error:
|
if cache.error:
|
||||||
flash(cache.error, 'error')
|
flash(cache.error, 'error')
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue