chg: Rename scrape -> capture everywhere

Related to #118
2020-11-03 16:32:04 +01:00 · 2020-11-03 16:32:04 +01:00 · 99c939fd7f
parent 2f1a0f5da8
commit 99c939fd7f
5 changed files with 30 additions and 30 deletions
--- a/lookyloo/lookyloo.py
+++ b/lookyloo/lookyloo.py
@ -47,7 +47,7 @@ class Lookyloo():
        self.taxonomies = get_taxonomies()

        self.redis: Redis = Redis(unix_socket_path=get_socket_path('cache'), decode_responses=True)
-        self.scrape_dir: Path = get_homedir() / 'scraped'
+        self.capture_dir: Path = get_homedir() / 'scraped'
        if os.environ.get('SPLASH_URL_DOCKER'):
            # In order to have a working default for the docker image, it is easier to use an environment variable
            self.splash_url: str = os.environ['SPLASH_URL_DOCKER']
@ -55,7 +55,7 @@ class Lookyloo():
            self.splash_url = get_config('generic', 'splash_url')
        self.only_global_lookups: bool = get_config('generic', 'only_global_lookups')

-        safe_create_dir(self.scrape_dir)
+        safe_create_dir(self.capture_dir)

        # Initialize 3rd party components
        self.pi = PhishingInitiative(get_config('modules', 'PhishingInitiative'))
@ -504,15 +504,15 @@ class Lookyloo():

    @property
    def capture_dirs(self) -> List[Path]:
-        for capture_dir in self.scrape_dir.iterdir():
+        for capture_dir in self.capture_dir.iterdir():
            if capture_dir.is_dir() and not capture_dir.iterdir():
-                # Cleanup self.scrape_dir of failed runs.
+                # Cleanup self.capture_dir of failed runs.
                capture_dir.rmdir()
            if not (capture_dir / 'uuid').exists():
                # Create uuid if missing
                with (capture_dir / 'uuid').open('w') as f:
                    f.write(str(uuid4()))
-        return sorted(self.scrape_dir.iterdir(), reverse=True)
+        return sorted(self.capture_dir.iterdir(), reverse=True)

    def lookup_capture_dir(self, capture_uuid: str) -> Union[Path, None]:
        capture_dir: str = self.redis.hget('lookup_dirs', capture_uuid)  # type: ignore
@ -520,7 +520,7 @@ class Lookyloo():
            return Path(capture_dir)
        return None

-    def enqueue_scrape(self, query: MutableMapping[str, Any]) -> str:
+    def enqueue_capture(self, query: MutableMapping[str, Any]) -> str:
        perma_uuid = str(uuid4())
        p = self.redis.pipeline()
        for key, value in query.items():
@ -528,19 +528,19 @@ class Lookyloo():
                # Yes, empty string because that's False.
                query[key] = 1 if value else ''
        p.hmset(perma_uuid, query)
-        p.sadd('to_scrape', perma_uuid)
+        p.sadd('to_capture', perma_uuid)
        p.execute()
        return perma_uuid

-    def process_scrape_queue(self) -> Union[bool, None]:
-        uuid = self.redis.spop('to_scrape')
+    def process_capture_queue(self) -> Union[bool, None]:
+        uuid = self.redis.spop('to_capture')
        if not uuid:
            return None
-        to_scrape: Dict[str, Union[str, int, float]] = self.redis.hgetall(uuid)  # type: ignore
+        to_capture: Dict[str, Union[str, int, float]] = self.redis.hgetall(uuid)  # type: ignore
        self.redis.delete(uuid)
-        to_scrape['perma_uuid'] = uuid
-        if self.scrape(**to_scrape):  # type: ignore
-            self.logger.info(f'Processed {to_scrape["url"]}')
+        to_capture['perma_uuid'] = uuid
+        if self.capture(**to_capture):  # type: ignore
+            self.logger.info(f'Processed {to_capture["url"]}')
            return True
        return False

@ -638,7 +638,7 @@ class Lookyloo():
    def get_capture(self, capture_uuid: str) -> BytesIO:
        return self._get_raw(capture_uuid)

-    def scrape(self, url: str, cookies_pseudofile: Optional[Union[BufferedIOBase, str]]=None,
+    def capture(self, url: str, cookies_pseudofile: Optional[Union[BufferedIOBase, str]]=None,
               depth: int=1, listing: bool=True, user_agent: Optional[str]=None,
               referer: str='', perma_uuid: Optional[str]=None, os: Optional[str]=None,
               browser: Optional[str]=None) -> Union[bool, str]:
@ -668,7 +668,7 @@ class Lookyloo():
            ua = user_agent

        if int(depth) > int(get_config('generic', 'max_depth')):
-            self.logger.warning(f'Not allowed to scrape on a depth higher than {get_config("generic", "max_depth")}: {depth}')
+            self.logger.warning(f'Not allowed to capture on a depth higher than {get_config("generic", "max_depth")}: {depth}')
            depth = int(get_config('generic', 'max_depth'))
        items = crawl(self.splash_url, url, cookies=cookies, depth=depth, user_agent=ua,
                      referer=referer, log_enabled=True, log_level=get_config('generic', 'splash_loglevel'))
@ -678,7 +678,7 @@ class Lookyloo():
        if not perma_uuid:
            perma_uuid = str(uuid4())
        width = len(str(len(items)))
-        dirpath = self.scrape_dir / datetime.now().isoformat()
+        dirpath = self.capture_dir / datetime.now().isoformat()
        safe_create_dir(dirpath)
        for i, item in enumerate(items):
            if not listing:  # Write no_index marker
--- a/website/web/init.py
+++ b/website/web/init.py
@ -448,12 +448,12 @@ def rebuild_cache():
@app.route('/submit', methods=['POST', 'GET'])
 def submit():
    to_query = request.get_json(force=True)
-    perma_uuid = lookyloo.enqueue_scrape(to_query)
+    perma_uuid = lookyloo.enqueue_capture(to_query)
    return Response(perma_uuid, mimetype='text/text')


-@app.route('/scrape', methods=['GET', 'POST'])
-def scrape_web():
+@app.route('/capture', methods=['GET', 'POST'])
+def capture_web():
    if request.form.get('url'):
        # check if the post request has the file part
        if 'cookies' in request.files and request.files['cookies'].filename:
@ -464,11 +464,11 @@ def scrape_web():
        if url:
            depth: int = request.form.get('depth') if request.form.get('depth') else 1  # type: ignore
            listing: bool = request.form.get('listing') if request.form.get('listing') else False  # type: ignore
-            perma_uuid = lookyloo.scrape(url=url, cookies_pseudofile=cookie_file,
-                                         depth=depth, listing=listing,
-                                         user_agent=request.form.get('user_agent'),
-                                         referer=request.form.get('referer'),  # type: ignore
-                                         os=request.form.get('os'), browser=request.form.get('browser'))
+            perma_uuid = lookyloo.capture(url=url, cookies_pseudofile=cookie_file,
+                                          depth=depth, listing=listing,
+                                          user_agent=request.form.get('user_agent'),
+                                          referer=request.form.get('referer'),  # type: ignore
+                                          os=request.form.get('os'), browser=request.form.get('browser'))
            return redirect(url_for('tree', tree_uuid=perma_uuid))
    user_agents: Dict[str, Any] = {}
    if get_config('generic', 'use_user_agents_users'):
@ -478,7 +478,7 @@ def scrape_web():
    if not user_agents:
        user_agents = get_user_agents()
    user_agents.pop('by_frequency')
-    return render_template('scrape.html', user_agents=user_agents)
+    return render_template('capture.html', user_agents=user_agents)


@app.route('/cookies/<string:cookie_name>', methods=['GET'])
--- a/website/web/static/capture.js
+++ b/website/web/static/capture.js
--- a/website/web/templates/capture.html
+++ b/website/web/templates/capture.html
@ -1,5 +1,5 @@
 {% extends "main.html" %}
-{% block title %}Scrape{% endblock %}
+{% block title %}Capture{% endblock %}

 {% block content %}
 <div class="container">
@ -8,7 +8,7 @@
         alt="Lookyloo" width="400">
  </center>
  </br>
-  <form role="form" action="{{ url_for('scrape_web') }}" method=post enctype=multipart/form-data>
+  <form role="form" action="{{ url_for('capture_web') }}" method=post enctype=multipart/form-data>
    <div class="form-group row">
      <div class="col-sm-10">
        <div class="form-check">
@ -108,5 +108,5 @@

 {% block scripts %}
  {{ super() }}
-  <script src='{{ url_for('static', filename='scrape.js') }}'></script>
+  <script src='{{ url_for('static', filename='capture.js') }}'></script>
 {% endblock %}
--- a/website/web/templates/index.html
+++ b/website/web/templates/index.html
@ -39,13 +39,13 @@ $(document).ready(function () {

 {% block content %}
  <center>
-    <a href="{{ url_for('scrape_web') }}">
+    <a href="{{ url_for('capture_web') }}">
      <img src="{{ url_for('static', filename='lookyloo.jpeg') }}"
           alt="Lookyloo" width="200">
    </a>
  </center>
  <center>
-    <h2><a href="{{ url_for('scrape_web') }}">Start a new capture</a></h2>
+    <h2><a href="{{ url_for('capture_web') }}">Start a new capture</a></h2>
    <br><br>
    {{ render_messages(container=True, dismissible=True) }}
  </center>