2019-01-29 18:37:13 +01:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
|
|
|
from pathlib import Path
|
|
|
|
import logging
|
|
|
|
|
|
|
|
from lookyloo.abstractmanager import AbstractManager
|
2019-04-05 16:12:54 +02:00
|
|
|
from lookyloo.helpers import get_homedir, set_running, unset_running, shutdown_requested
|
2019-01-30 14:30:01 +01:00
|
|
|
from lookyloo.lookyloo import Lookyloo
|
2019-01-29 18:37:13 +01:00
|
|
|
|
|
|
|
logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
|
|
|
|
level=logging.INFO, datefmt='%I:%M:%S')
|
|
|
|
|
2019-07-05 16:37:57 +02:00
|
|
|
# Set it to True if your instance is publicly available so users aren't able to scan your internal network
|
|
|
|
only_global_lookups = False
|
|
|
|
|
2019-01-29 18:37:13 +01:00
|
|
|
|
|
|
|
class AsyncScraper(AbstractManager):
|
|
|
|
|
|
|
|
def __init__(self, storage_directory: Path=None, loglevel: int=logging.INFO):
|
|
|
|
super().__init__(loglevel)
|
|
|
|
if not storage_directory:
|
|
|
|
self.storage_directory = get_homedir() / 'scraped'
|
2019-07-05 16:37:57 +02:00
|
|
|
self.lookyloo = Lookyloo(loglevel=loglevel, only_global_lookups=only_global_lookups)
|
2019-01-29 18:37:13 +01:00
|
|
|
|
|
|
|
def _to_run_forever(self):
|
2019-04-05 14:01:36 +02:00
|
|
|
set_running('async_scrape')
|
2019-04-05 16:12:54 +02:00
|
|
|
while True:
|
|
|
|
url = self.lookyloo.process_scrape_queue()
|
|
|
|
if url is None or shutdown_requested():
|
|
|
|
break
|
2019-04-05 14:01:36 +02:00
|
|
|
unset_running('async_scrape')
|
2019-01-29 18:37:13 +01:00
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
m = AsyncScraper()
|
|
|
|
m.run(sleep_in_sec=1)
|