2021-01-14 17:12:16 +01:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
|
|
|
import json
|
2021-09-22 17:09:04 +02:00
|
|
|
import logging
|
2022-07-29 13:40:15 +02:00
|
|
|
import os
|
2021-09-22 17:09:04 +02:00
|
|
|
import pickle
|
|
|
|
import sys
|
|
|
|
import time
|
|
|
|
|
|
|
|
from collections.abc import Mapping
|
2021-09-07 12:59:31 +02:00
|
|
|
from datetime import datetime
|
2021-09-22 17:09:04 +02:00
|
|
|
from functools import lru_cache
|
2021-01-14 17:12:16 +01:00
|
|
|
from pathlib import Path
|
2022-07-27 14:36:56 +02:00
|
|
|
from typing import Any, Dict, List, Optional, Tuple, Union, Set
|
2021-09-22 17:09:04 +02:00
|
|
|
|
|
|
|
import dns.rdatatype
|
|
|
|
import dns.resolver
|
|
|
|
from har2tree import CrawledTree, Har2TreeError, HarFile
|
|
|
|
from redis import Redis
|
2021-01-14 17:12:16 +01:00
|
|
|
|
2021-09-22 17:09:04 +02:00
|
|
|
from .context import Context
|
2022-07-29 13:08:42 +02:00
|
|
|
from .helpers import get_captures_dir
|
2021-09-24 16:16:41 +02:00
|
|
|
from .indexing import Indexing
|
2021-10-18 13:06:43 +02:00
|
|
|
from .default import LookylooException, try_make_file, get_config
|
|
|
|
from .exceptions import MissingCaptureDirectory, NoValidHarFile, MissingUUID, TreeNeedsRebuild
|
2021-01-14 17:28:59 +01:00
|
|
|
|
2021-01-14 17:12:16 +01:00
|
|
|
|
|
|
|
class CaptureCache():
|
2021-06-16 00:16:56 +02:00
|
|
|
__slots__ = ('uuid', 'title', 'timestamp', 'url', 'redirects', 'capture_dir',
|
|
|
|
'error', 'incomplete_redirects', 'no_index', 'categories', 'parent')
|
2021-01-14 17:12:16 +01:00
|
|
|
|
|
|
|
def __init__(self, cache_entry: Dict[str, Any]):
|
2022-09-26 14:58:30 +02:00
|
|
|
__default_cache_keys: Tuple[str, str, str, str, str, str] = ('uuid', 'title', 'timestamp',
|
|
|
|
'url', 'redirects', 'capture_dir')
|
|
|
|
if 'uuid' not in cache_entry or 'capture_dir' not in cache_entry:
|
|
|
|
raise LookylooException(f'The capture is deeply broken: {cache_entry}')
|
|
|
|
self.uuid: str = cache_entry['uuid']
|
|
|
|
self.capture_dir: Path = Path(cache_entry['capture_dir'])
|
|
|
|
|
2021-06-16 00:16:56 +02:00
|
|
|
if all(key in cache_entry.keys() for key in __default_cache_keys):
|
2021-01-14 17:12:16 +01:00
|
|
|
self.title: str = cache_entry['title']
|
2021-09-30 15:38:25 +02:00
|
|
|
try:
|
|
|
|
self.timestamp: datetime = datetime.strptime(cache_entry['timestamp'], '%Y-%m-%dT%H:%M:%S.%f%z')
|
|
|
|
except ValueError:
|
|
|
|
# If the microsecond is missing (0), it fails
|
2021-10-01 14:53:46 +02:00
|
|
|
self.timestamp = datetime.strptime(cache_entry['timestamp'], '%Y-%m-%dT%H:%M:%S%z')
|
2021-01-14 17:12:16 +01:00
|
|
|
self.url: str = cache_entry['url']
|
|
|
|
self.redirects: List[str] = json.loads(cache_entry['redirects'])
|
2021-08-23 12:17:44 +02:00
|
|
|
if not self.capture_dir.exists():
|
|
|
|
raise MissingCaptureDirectory(f'The capture {self.uuid} does not exists in {self.capture_dir}.')
|
2021-01-14 17:12:16 +01:00
|
|
|
elif not cache_entry.get('error'):
|
2021-06-16 00:16:56 +02:00
|
|
|
missing = set(__default_cache_keys) - set(cache_entry.keys())
|
2021-01-14 17:28:59 +01:00
|
|
|
raise LookylooException(f'Missing keys ({missing}), no error message. It should not happen.')
|
2021-01-14 17:12:16 +01:00
|
|
|
|
2021-01-14 17:28:59 +01:00
|
|
|
# Error without all the keys in __default_cache_keys was fatal.
|
|
|
|
# if the keys in __default_cache_keys are present, it was an HTTP error
|
2021-01-14 17:12:16 +01:00
|
|
|
self.error: Optional[str] = cache_entry.get('error')
|
2021-01-18 13:26:02 +01:00
|
|
|
self.incomplete_redirects: bool = True if cache_entry.get('incomplete_redirects') in [1, '1'] else False
|
|
|
|
self.no_index: bool = True if cache_entry.get('no_index') in [1, '1'] else False
|
2021-01-14 17:12:16 +01:00
|
|
|
self.categories: List[str] = json.loads(cache_entry['categories']) if cache_entry.get('categories') else []
|
2021-05-12 22:30:04 +02:00
|
|
|
self.parent: Optional[str] = cache_entry.get('parent')
|
2021-09-22 17:09:04 +02:00
|
|
|
|
|
|
|
@property
|
|
|
|
def tree(self) -> CrawledTree:
|
2021-09-24 16:16:41 +02:00
|
|
|
return load_pickle_tree(self.capture_dir, self.capture_dir.stat().st_mtime)
|
2021-09-22 17:09:04 +02:00
|
|
|
|
|
|
|
|
|
|
|
def remove_pickle_tree(capture_dir: Path) -> None:
|
|
|
|
pickle_file = capture_dir / 'tree.pickle'
|
|
|
|
if pickle_file.exists():
|
|
|
|
pickle_file.unlink()
|
|
|
|
|
|
|
|
|
2021-09-24 16:16:41 +02:00
|
|
|
@lru_cache(maxsize=256)
|
|
|
|
def load_pickle_tree(capture_dir: Path, last_mod_time: int) -> CrawledTree:
|
2021-09-22 17:09:04 +02:00
|
|
|
pickle_file = capture_dir / 'tree.pickle'
|
|
|
|
if pickle_file.exists():
|
|
|
|
with pickle_file.open('rb') as _p:
|
|
|
|
try:
|
2021-09-23 16:40:39 +02:00
|
|
|
tree = pickle.load(_p)
|
|
|
|
if tree.root_hartree.har.path.exists():
|
|
|
|
return tree
|
|
|
|
else:
|
|
|
|
# The capture was moved.
|
|
|
|
remove_pickle_tree(capture_dir)
|
2021-09-22 17:09:04 +02:00
|
|
|
except pickle.UnpicklingError:
|
|
|
|
remove_pickle_tree(capture_dir)
|
|
|
|
except EOFError:
|
|
|
|
remove_pickle_tree(capture_dir)
|
|
|
|
except Exception:
|
|
|
|
remove_pickle_tree(capture_dir)
|
2022-09-26 14:58:30 +02:00
|
|
|
if list(capture_dir.rglob('*.har')):
|
|
|
|
raise TreeNeedsRebuild('We have HAR files and need to rebuild the tree.')
|
|
|
|
# The tree doesn't need to be rebuilt if there are no HAR files.
|
|
|
|
raise NoValidHarFile("Couldn't find HAR files")
|
2021-09-22 17:09:04 +02:00
|
|
|
|
|
|
|
|
|
|
|
class CapturesIndex(Mapping):
|
|
|
|
|
|
|
|
def __init__(self, redis: Redis, contextualizer: Optional[Context]=None):
|
|
|
|
self.logger = logging.getLogger(f'{self.__class__.__name__}')
|
|
|
|
self.logger.setLevel(get_config('generic', 'loglevel'))
|
|
|
|
self.redis = redis
|
2021-09-24 16:16:41 +02:00
|
|
|
self.indexing = Indexing()
|
2021-09-22 17:09:04 +02:00
|
|
|
self.contextualizer = contextualizer
|
|
|
|
self.__cache: Dict[str, CaptureCache] = {}
|
2021-09-23 10:29:02 +02:00
|
|
|
self._quick_init()
|
2021-09-22 17:09:04 +02:00
|
|
|
|
2022-07-27 14:36:56 +02:00
|
|
|
@property
|
|
|
|
def cached_captures(self) -> Set[str]:
|
|
|
|
return set(self.__cache.keys())
|
|
|
|
|
2021-09-22 17:09:04 +02:00
|
|
|
def __getitem__(self, uuid: str) -> CaptureCache:
|
|
|
|
if uuid in self.__cache:
|
|
|
|
if (self.__cache[uuid].capture_dir.exists()
|
|
|
|
and not self.__cache[uuid].incomplete_redirects):
|
|
|
|
return self.__cache[uuid]
|
|
|
|
del self.__cache[uuid]
|
|
|
|
capture_dir = self._get_capture_dir(uuid)
|
2022-07-29 13:40:15 +02:00
|
|
|
cached = self.redis.hgetall(capture_dir)
|
2021-09-22 17:09:04 +02:00
|
|
|
if cached:
|
|
|
|
cc = CaptureCache(cached)
|
|
|
|
# NOTE: checking for pickle to exist may be a bad idea here.
|
|
|
|
if (cc.capture_dir.exists()
|
|
|
|
and (cc.capture_dir / 'tree.pickle').exists()
|
|
|
|
and not cc.incomplete_redirects):
|
|
|
|
self.__cache[uuid] = cc
|
|
|
|
return self.__cache[uuid]
|
2022-07-29 13:40:15 +02:00
|
|
|
|
|
|
|
self.__cache[uuid] = self._set_capture_cache(capture_dir)
|
2021-09-22 17:09:04 +02:00
|
|
|
return self.__cache[uuid]
|
|
|
|
|
|
|
|
def __iter__(self):
|
|
|
|
return iter(self.__cache)
|
|
|
|
|
|
|
|
def __len__(self):
|
|
|
|
return len(self.__cache)
|
|
|
|
|
|
|
|
def reload_cache(self, uuid: str) -> None:
|
|
|
|
if uuid in self.__cache:
|
2022-07-29 19:08:56 +02:00
|
|
|
self.redis.delete(str(self.__cache[uuid].capture_dir))
|
2021-09-22 17:09:04 +02:00
|
|
|
del self.__cache[uuid]
|
|
|
|
|
|
|
|
def remove_pickle(self, uuid: str) -> None:
|
|
|
|
if uuid in self.__cache:
|
|
|
|
remove_pickle_tree(self.__cache[uuid].capture_dir)
|
|
|
|
del self.__cache[uuid]
|
|
|
|
|
|
|
|
def rebuild_all(self) -> None:
|
|
|
|
for uuid, cache in self.__cache.items():
|
|
|
|
remove_pickle_tree(cache.capture_dir)
|
|
|
|
self.redis.flushdb()
|
|
|
|
self.__cache = {}
|
|
|
|
|
2021-09-24 12:02:28 +02:00
|
|
|
def lru_cache_status(self):
|
|
|
|
return load_pickle_tree.cache_info()
|
|
|
|
|
2021-09-23 10:29:02 +02:00
|
|
|
def _quick_init(self) -> None:
|
|
|
|
'''Initialize the cache with a list of UUIDs, with less back and forth with redis.
|
|
|
|
Only get recent captures.'''
|
|
|
|
p = self.redis.pipeline()
|
|
|
|
for directory in self.redis.hvals('lookup_dirs'):
|
|
|
|
p.hgetall(directory)
|
|
|
|
for cache in p.execute():
|
|
|
|
if not cache:
|
|
|
|
continue
|
|
|
|
try:
|
|
|
|
cc = CaptureCache(cache)
|
|
|
|
except LookylooException as e:
|
2022-09-26 14:58:30 +02:00
|
|
|
self.logger.warning(f'Unable to initialize the cache: {e}')
|
2021-09-23 10:29:02 +02:00
|
|
|
continue
|
|
|
|
self.__cache[cc.uuid] = cc
|
|
|
|
|
2022-07-29 13:40:15 +02:00
|
|
|
def _get_capture_dir(self, uuid: str) -> str:
|
2021-09-22 17:09:04 +02:00
|
|
|
# Try to get from the recent captures cache in redis
|
|
|
|
capture_dir = self.redis.hget('lookup_dirs', uuid)
|
|
|
|
if capture_dir:
|
2022-07-29 13:40:15 +02:00
|
|
|
if os.path.exists(capture_dir):
|
|
|
|
return capture_dir
|
2021-09-22 17:09:04 +02:00
|
|
|
# The capture was either removed or archived, cleaning up
|
|
|
|
self.redis.hdel('lookup_dirs', uuid)
|
|
|
|
self.redis.delete(capture_dir)
|
|
|
|
|
|
|
|
# Try to get from the archived captures cache in redis
|
|
|
|
capture_dir = self.redis.hget('lookup_dirs_archived', uuid)
|
|
|
|
if capture_dir:
|
2022-07-29 13:40:15 +02:00
|
|
|
if os.path.exists(capture_dir):
|
|
|
|
return capture_dir
|
2021-09-22 17:09:04 +02:00
|
|
|
# The capture was removed, remove the UUID
|
2021-09-27 11:36:27 +02:00
|
|
|
self.redis.hdel('lookup_dirs_archived', uuid)
|
|
|
|
self.redis.delete(capture_dir)
|
2021-09-22 17:09:04 +02:00
|
|
|
self.logger.warning(f'UUID ({uuid}) linked to a missing directory ({capture_dir}).')
|
|
|
|
raise MissingCaptureDirectory(f'UUID ({uuid}) linked to a missing directory ({capture_dir}).')
|
|
|
|
raise MissingUUID(f'Unable to find UUID {uuid}.')
|
|
|
|
|
|
|
|
def _create_pickle(self, capture_dir: Path) -> CrawledTree:
|
|
|
|
with (capture_dir / 'uuid').open() as f:
|
|
|
|
uuid = f.read().strip()
|
|
|
|
|
|
|
|
lock_file = capture_dir / 'lock'
|
|
|
|
if try_make_file(lock_file):
|
|
|
|
# Lock created, we can process
|
|
|
|
with lock_file.open('w') as f:
|
|
|
|
f.write(datetime.now().isoformat())
|
|
|
|
else:
|
|
|
|
# The pickle is being created somewhere else, wait until it's done.
|
|
|
|
while lock_file.exists():
|
|
|
|
time.sleep(5)
|
2021-09-24 16:16:41 +02:00
|
|
|
return load_pickle_tree(capture_dir, capture_dir.stat().st_mtime)
|
2021-09-22 17:09:04 +02:00
|
|
|
|
2022-09-26 14:58:30 +02:00
|
|
|
har_files = sorted(capture_dir.glob('*.har'))
|
2021-09-22 17:09:04 +02:00
|
|
|
try:
|
|
|
|
tree = CrawledTree(har_files, uuid)
|
|
|
|
self.__resolve_dns(tree)
|
|
|
|
if self.contextualizer:
|
|
|
|
self.contextualizer.contextualize_tree(tree)
|
|
|
|
except Har2TreeError as e:
|
2022-09-26 14:58:30 +02:00
|
|
|
# unable to use the HAR files, get them out of the way
|
|
|
|
for har_file in har_files:
|
|
|
|
har_file.rename(har_file.with_suffix('.broken'))
|
|
|
|
raise NoValidHarFile(f'We got har files, but they are broken: {e}')
|
2021-09-22 17:09:04 +02:00
|
|
|
except RecursionError as e:
|
|
|
|
raise NoValidHarFile(f'Tree too deep, probably a recursive refresh: {e}.\n Append /export to the URL to get the files.')
|
|
|
|
else:
|
2022-07-12 18:44:33 +02:00
|
|
|
with (capture_dir / 'tree.pickle').open('wb') as _p:
|
2021-09-22 17:09:04 +02:00
|
|
|
# Some pickles require a pretty high recursion limit, this kindof fixes it.
|
|
|
|
# If the capture is really broken (generally a refresh to self), the capture
|
|
|
|
# is discarded in the RecursionError above.
|
|
|
|
default_recursion_limit = sys.getrecursionlimit()
|
|
|
|
sys.setrecursionlimit(int(default_recursion_limit * 1.1))
|
|
|
|
try:
|
|
|
|
pickle.dump(tree, _p)
|
|
|
|
except RecursionError as e:
|
|
|
|
raise NoValidHarFile(f'Tree too deep, probably a recursive refresh: {e}.\n Append /export to the URL to get the files.')
|
|
|
|
sys.setrecursionlimit(default_recursion_limit)
|
|
|
|
finally:
|
|
|
|
lock_file.unlink(missing_ok=True)
|
|
|
|
return tree
|
|
|
|
|
2022-07-29 13:40:15 +02:00
|
|
|
def _set_capture_cache(self, capture_dir_str: str) -> CaptureCache:
|
2021-09-22 17:09:04 +02:00
|
|
|
'''Populate the redis cache for a capture. Mostly used on the index page.
|
|
|
|
NOTE: Doesn't require the pickle.'''
|
2022-07-29 13:40:15 +02:00
|
|
|
capture_dir = Path(capture_dir_str)
|
2021-09-22 17:09:04 +02:00
|
|
|
with (capture_dir / 'uuid').open() as f:
|
|
|
|
uuid = f.read().strip()
|
|
|
|
|
2022-07-29 19:31:56 +02:00
|
|
|
try:
|
|
|
|
tree = load_pickle_tree(capture_dir, capture_dir.stat().st_mtime)
|
|
|
|
except TreeNeedsRebuild:
|
2022-09-25 22:52:42 +02:00
|
|
|
try:
|
|
|
|
tree = self._create_pickle(capture_dir)
|
|
|
|
self.indexing.new_internal_uuids(tree)
|
|
|
|
except NoValidHarFile:
|
2022-09-26 14:58:30 +02:00
|
|
|
self.logger.warning('Unable to rebuild the tree, the HAR files are broken.')
|
2022-07-29 19:31:56 +02:00
|
|
|
|
2022-07-29 13:40:15 +02:00
|
|
|
cache: Dict[str, Union[str, int]] = {'uuid': uuid, 'capture_dir': capture_dir_str}
|
2021-09-22 17:09:04 +02:00
|
|
|
if (capture_dir / 'error.txt').exists():
|
|
|
|
# Something went wrong
|
|
|
|
with (capture_dir / 'error.txt').open() as _error:
|
|
|
|
content = _error.read()
|
|
|
|
try:
|
|
|
|
error_to_cache = json.loads(content)
|
|
|
|
if isinstance(error_to_cache, dict) and error_to_cache.get('details'):
|
|
|
|
error_to_cache = error_to_cache.get('details')
|
|
|
|
except json.decoder.JSONDecodeError:
|
|
|
|
# old format
|
|
|
|
error_to_cache = content
|
|
|
|
cache['error'] = f'The capture {capture_dir.name} has an error: {error_to_cache}'
|
|
|
|
|
2022-09-26 14:58:30 +02:00
|
|
|
if (har_files := sorted(capture_dir.rglob('*.har'))):
|
2021-09-22 17:09:04 +02:00
|
|
|
try:
|
|
|
|
har = HarFile(har_files[0], uuid)
|
|
|
|
cache['title'] = har.initial_title
|
|
|
|
cache['timestamp'] = har.initial_start_time
|
|
|
|
cache['url'] = har.root_url
|
2022-07-29 19:31:56 +02:00
|
|
|
cache['redirects'] = json.dumps(tree.redirects)
|
|
|
|
cache['incomplete_redirects'] = 0
|
2021-09-22 17:09:04 +02:00
|
|
|
except Har2TreeError as e:
|
|
|
|
cache['error'] = str(e)
|
|
|
|
else:
|
2022-09-25 22:52:42 +02:00
|
|
|
if 'error' not in cache:
|
|
|
|
cache['error'] = f'No har files in {capture_dir.name}'
|
2021-09-22 17:09:04 +02:00
|
|
|
|
|
|
|
if (cache.get('error')
|
|
|
|
and isinstance(cache['error'], str)
|
|
|
|
and 'HTTP Error' not in cache['error']):
|
|
|
|
self.logger.warning(cache['error'])
|
|
|
|
|
|
|
|
if (capture_dir / 'categories').exists():
|
|
|
|
with (capture_dir / 'categories').open() as _categories:
|
|
|
|
cache['categories'] = json.dumps([c.strip() for c in _categories.readlines()])
|
|
|
|
|
|
|
|
if (capture_dir / 'no_index').exists():
|
|
|
|
# If the folders claims anonymity
|
|
|
|
cache['no_index'] = 1
|
|
|
|
|
|
|
|
if (capture_dir / 'parent').exists():
|
|
|
|
# The capture was initiated from an other one
|
|
|
|
with (capture_dir / 'parent').open() as f:
|
|
|
|
cache['parent'] = f.read().strip()
|
|
|
|
|
|
|
|
p = self.redis.pipeline()
|
2022-07-29 13:15:37 +02:00
|
|
|
# if capture_dir.is_relative_to(get_captures_dir()): # Requires python 3.9
|
2022-07-29 13:40:15 +02:00
|
|
|
if capture_dir_str.startswith(str(get_captures_dir())):
|
|
|
|
p.hset('lookup_dirs', uuid, capture_dir_str)
|
2022-07-29 13:08:42 +02:00
|
|
|
else:
|
2022-07-29 13:40:15 +02:00
|
|
|
p.hset('lookup_dirs_archived', uuid, capture_dir_str)
|
2022-07-29 13:08:42 +02:00
|
|
|
|
2022-07-29 13:40:15 +02:00
|
|
|
p.hset(capture_dir_str, mapping=cache) # type: ignore
|
2021-09-22 17:09:04 +02:00
|
|
|
p.execute()
|
|
|
|
return CaptureCache(cache)
|
|
|
|
|
|
|
|
def __resolve_dns(self, ct: CrawledTree):
|
|
|
|
'''Resolves all domains of the tree, keeps A (IPv4), AAAA (IPv6), and CNAME entries
|
|
|
|
and store them in ips.json and cnames.json, in the capture directory.
|
|
|
|
Updates the nodes of the tree accordingly so the information is available.
|
|
|
|
'''
|
|
|
|
|
2022-05-23 00:15:52 +02:00
|
|
|
def _build_cname_chain(known_cnames: Dict[str, str], hostname) -> List[str]:
|
2021-09-22 17:09:04 +02:00
|
|
|
'''Returns a list of CNAMEs starting from one hostname.
|
|
|
|
The CNAMEs resolutions are made in `_resolve_dns`. A hostname can have a CNAME entry
|
|
|
|
and the CNAME entry can have an other CNAME entry, and so on multiple times.
|
|
|
|
This method loops over the hostnames until there are no CNAMES.'''
|
|
|
|
cnames: List[str] = []
|
|
|
|
to_search = hostname
|
|
|
|
while True:
|
2022-05-23 00:15:52 +02:00
|
|
|
if not known_cnames.get(to_search):
|
2021-09-22 17:09:04 +02:00
|
|
|
break
|
2022-05-23 00:15:52 +02:00
|
|
|
cnames.append(known_cnames[to_search])
|
2021-09-22 17:09:04 +02:00
|
|
|
to_search = known_cnames[to_search]
|
|
|
|
return cnames
|
|
|
|
|
|
|
|
cnames_path = ct.root_hartree.har.path.parent / 'cnames.json'
|
|
|
|
ips_path = ct.root_hartree.har.path.parent / 'ips.json'
|
2022-05-23 00:15:52 +02:00
|
|
|
host_cnames: Dict[str, str] = {}
|
2021-09-22 17:09:04 +02:00
|
|
|
if cnames_path.exists():
|
2021-09-29 15:05:31 +02:00
|
|
|
try:
|
|
|
|
with cnames_path.open() as f:
|
|
|
|
host_cnames = json.load(f)
|
|
|
|
except json.decoder.JSONDecodeError:
|
|
|
|
# The json is broken, delete and re-trigger the requests
|
|
|
|
host_cnames = {}
|
2021-09-22 17:09:04 +02:00
|
|
|
|
|
|
|
host_ips: Dict[str, List[str]] = {}
|
|
|
|
if ips_path.exists():
|
2021-09-29 15:05:31 +02:00
|
|
|
try:
|
|
|
|
with ips_path.open() as f:
|
|
|
|
host_ips = json.load(f)
|
|
|
|
except json.decoder.JSONDecodeError:
|
|
|
|
# The json is broken, delete and re-trigger the requests
|
|
|
|
host_ips = {}
|
2021-09-22 17:09:04 +02:00
|
|
|
|
|
|
|
for node in ct.root_hartree.hostname_tree.traverse():
|
|
|
|
if node.name not in host_cnames or node.name not in host_ips:
|
|
|
|
# Resolve and cache
|
|
|
|
try:
|
|
|
|
response = dns.resolver.resolve(node.name, search=True)
|
|
|
|
for answer in response.response.answer:
|
|
|
|
if answer.rdtype == dns.rdatatype.RdataType.CNAME:
|
|
|
|
host_cnames[str(answer.name).rstrip('.')] = str(answer[0].target).rstrip('.')
|
|
|
|
else:
|
2022-05-23 00:15:52 +02:00
|
|
|
host_cnames[str(answer.name).rstrip('.')] = ''
|
2021-09-22 17:09:04 +02:00
|
|
|
|
|
|
|
if answer.rdtype in [dns.rdatatype.RdataType.A, dns.rdatatype.RdataType.AAAA]:
|
2022-03-29 21:13:02 +02:00
|
|
|
host_ips[str(answer.name).rstrip('.')] = list({str(b) for b in answer})
|
2021-09-22 17:09:04 +02:00
|
|
|
except Exception:
|
2022-05-23 00:15:52 +02:00
|
|
|
host_cnames[node.name] = ''
|
2021-09-22 17:09:04 +02:00
|
|
|
host_ips[node.name] = []
|
2022-05-23 00:15:52 +02:00
|
|
|
if (cnames := _build_cname_chain(host_cnames, node.name)):
|
2021-09-22 17:09:04 +02:00
|
|
|
node.add_feature('cname', cnames)
|
|
|
|
if cnames[-1] in host_ips:
|
|
|
|
node.add_feature('resolved_ips', host_ips[cnames[-1]])
|
|
|
|
elif node.name in host_ips:
|
|
|
|
node.add_feature('resolved_ips', host_ips[node.name])
|
|
|
|
|
|
|
|
with cnames_path.open('w') as f:
|
|
|
|
json.dump(host_cnames, f)
|
|
|
|
with ips_path.open('w') as f:
|
|
|
|
json.dump(host_ips, f)
|
|
|
|
return ct
|