diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index fab7ed8a..f4736ee5 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -22,11 +22,6 @@ jobs: # a pull request then we can checkout the head. fetch-depth: 2 - # If this run was triggered by a pull request event, then checkout - # the head of the pull request instead of the merge commit. - - run: git checkout HEAD^2 - if: ${{ github.event_name == 'pull_request' }} - # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL uses: github/codeql-action/init@v1 diff --git a/lookyloo/capturecache.py b/lookyloo/capturecache.py index 2cb77ebe..49d65561 100644 --- a/lookyloo/capturecache.py +++ b/lookyloo/capturecache.py @@ -1,7 +1,6 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -from dataclasses import dataclass from datetime import datetime import json from pathlib import Path @@ -10,13 +9,13 @@ from typing import Any, Dict, List, Optional, Tuple from .exceptions import LookylooException -@dataclass class CaptureCache(): - __default_cache_keys: Tuple[str, str, str, str, str, str] = \ - ('uuid', 'title', 'timestamp', 'url', 'redirects', 'capture_dir') + __slots__ = ('uuid', 'title', 'timestamp', 'url', 'redirects', 'capture_dir', + 'error', 'incomplete_redirects', 'no_index', 'categories', 'parent') def __init__(self, cache_entry: Dict[str, Any]): - if all(key in cache_entry.keys() for key in self.__default_cache_keys): + __default_cache_keys: Tuple[str, str, str, str, str, str] = ('uuid', 'title', 'timestamp', 'url', 'redirects', 'capture_dir') + if all(key in cache_entry.keys() for key in __default_cache_keys): self.uuid: str = cache_entry['uuid'] self.title: str = cache_entry['title'] self.timestamp: datetime = datetime.strptime(cache_entry['timestamp'], '%Y-%m-%dT%H:%M:%S.%f%z') @@ -24,7 +23,7 @@ class CaptureCache(): self.redirects: List[str] = json.loads(cache_entry['redirects']) self.capture_dir: Path = Path(cache_entry['capture_dir']) elif not cache_entry.get('error'): - missing = set(self.__default_cache_keys) - set(cache_entry.keys()) + missing = set(__default_cache_keys) - set(cache_entry.keys()) raise LookylooException(f'Missing keys ({missing}), no error message. It should not happen.') # Error without all the keys in __default_cache_keys was fatal. diff --git a/lookyloo/context.py b/lookyloo/context.py index a09507f0..d0c872de 100644 --- a/lookyloo/context.py +++ b/lookyloo/context.py @@ -33,7 +33,7 @@ class Context(): p = self.redis.pipeline() if filename == 'generic': # 1px images, files with spaces, empty => non-relevant stuff - for k, type_content in file_content.items(): + for _, type_content in file_content.items(): p.hmset('known_content', {h: type_content['description'] for h in type_content['entries']}) elif filename == 'malicious': # User defined as malicious @@ -133,7 +133,7 @@ class Context(): # this is the hash of an embeded content so it won't have a filename but has a different mimetype # FIXME: this is ugly. for ressource_mimetype, blobs in urlnode.embedded_ressources.items(): - for ressource_h, b in blobs: + for ressource_h, _ in blobs: if ressource_h == h: mimetype = ressource_mimetype.split(';')[0] break diff --git a/lookyloo/lookyloo.py b/lookyloo/lookyloo.py index 3efc2200..6406360f 100644 --- a/lookyloo/lookyloo.py +++ b/lookyloo/lookyloo.py @@ -128,7 +128,7 @@ class Lookyloo(): to_store: Dict[str, Any] = {'by_frequency': []} uas = Counter([entry.split('|', 1)[1] for entry in entries]) - for ua, count in uas.most_common(): + for ua, _ in uas.most_common(): parsed_ua = UserAgent(ua) if not parsed_ua.platform or not parsed_ua.browser: continue @@ -191,7 +191,7 @@ class Lookyloo(): categories = list(self.categories_capture(capture_uuid).keys()) self.indexing.index_categories_capture(capture_uuid, categories) except Har2TreeError as e: - raise NoValidHarFile(e.message) + raise NoValidHarFile(e) except RecursionError as e: raise NoValidHarFile(f'Tree too deep, probably a recursive refresh: {e}.\n Append /export to the URL to get the files.') else: @@ -471,7 +471,7 @@ class Lookyloo(): try: har = HarFile(har_files[0], uuid) except Har2TreeError as e: - error_cache['error'] = e.message + error_cache['error'] = str(e) fatal_error = True else: error_cache['error'] = f'No har files in {capture_dir.name}' @@ -541,7 +541,7 @@ class Lookyloo(): '''All the capture UUIDs present in the cache.''' return self.redis.hkeys('lookup_dirs') - def sorted_capture_cache(self, capture_uuids: Iterable[str]=[]) -> List[CaptureCache]: + def sorted_capture_cache(self, capture_uuids: Optional[Iterable[str]]=None) -> List[CaptureCache]: '''Get all the captures in the cache, sorted by timestamp (new -> old).''' if not capture_uuids: # Sort all captures @@ -550,7 +550,7 @@ class Lookyloo(): # No captures at all on the instance return [] - all_cache: List[CaptureCache] = [self._captures_index[uuid] for uuid in capture_uuids if uuid in self._captures_index] + all_cache: List[CaptureCache] = [self._captures_index[uuid] for uuid in capture_uuids if uuid in self._captures_index and not self._captures_index[uuid].incomplete_redirects] captures_to_get = set(capture_uuids) - set(self._captures_index.keys()) if captures_to_get: @@ -954,7 +954,7 @@ class Lookyloo(): details = self.indexing.get_body_hash_urls(body_hash) body_content = BytesIO() # get the body from the first entry in the details list - for url, entries in details.items(): + for _, entries in details.items(): ct = self.get_crawled_tree(entries[0]['capture']) urlnode = ct.root_hartree.get_url_node_by_uuid(entries[0]['urlnode']) if urlnode.body_hash == body_hash: @@ -962,7 +962,7 @@ class Lookyloo(): body_content = urlnode.body else: # The hash is an embedded resource - for mimetype, blobs in urlnode.body_hash.embedded_ressources.items(): + for _, blobs in urlnode.body_hash.embedded_ressources.items(): for h, b in blobs: if h == body_hash: body_content = b diff --git a/lookyloo/modules.py b/lookyloo/modules.py index b7db6ea1..5f421593 100644 --- a/lookyloo/modules.py +++ b/lookyloo/modules.py @@ -363,7 +363,7 @@ class PhishingInitiative(): if not force and pi_file.exists(): return - for i in range(3): + for _ in range(3): url_information = self.client.lookup(url) if not url_information['results']: # No results, that should not happen (?) @@ -457,7 +457,7 @@ class VirusTotal(): if not force and vt_file.exists(): return - for i in range(3): + for _ in range(3): try: url_information = self.client.get_object(f"/urls/{url_id}") with vt_file.open('w') as _f: diff --git a/poetry.lock b/poetry.lock index 1e3eadfd..7468b04a 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1122,7 +1122,7 @@ types-MarkupSafe = "*" [[package]] name = "types-markupsafe" -version = "1.1.1" +version = "1.1.2" description = "Typing stubs for MarkupSafe" category = "dev" optional = false @@ -1979,7 +1979,7 @@ types-jinja2 = [ {file = "types_Jinja2-2.11.1-py2.py3-none-any.whl", hash = "sha256:84f66a2612376ff3c1656198b10a04c7711b6d917d4f5f7f2c2f7ecec0afa040"}, ] types-markupsafe = [ - {file = "types_MarkupSafe-1.1.1-py2.py3-none-any.whl", hash = "sha256:a2c32269a26b4a7205f6f1581bd37cfbcd390297352b828a9643978392239516"}, + {file = "types_MarkupSafe-1.1.2-py2.py3-none-any.whl", hash = "sha256:b5e311cb6aad7f6da0bb1455494305e2bb7941b04c3c8cf9ed7bbd33cf8ba374"}, ] types-pkg-resources = [ {file = "types_pkg_resources-0.1.2-py2.py3-none-any.whl", hash = "sha256:42d640500de564f1ccc21f918117afadf78039e4fa7f513c647ccf742d609aeb"},