diff --git a/Dockerfile b/Dockerfile index 4be2799..348c32e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,18 +4,20 @@ ENV LANG=C.UTF-8 RUN apt-get update RUN apt-get -y upgrade -RUN apt-get -y install git wget python3-pip +RUN apt-get -y install wget python3-pip nodejs git RUN pip3 install pipenv -WORKDIR root_lookyloo - -RUN git clone https://github.com/CIRCL/lookyloo.git WORKDIR lookyloo -RUN sed -i "s/str='http:\/\/127.0.0.1:8050'/str='http:\/\/splash:8050'/g" lookyloo/lookyloo.py -RUN pipenv install -run echo LOOKYLOO_HOME="'`pwd`'" > .env -run nohup pipenv run run_backend.py --start -run nohup pipenv run async_scrape.py -CMD ["pipenv", "run", "start_website.py"] -EXPOSE 5100 +COPY lookyloo lookyloo/ +COPY client client/ +COPY bin bin/ +COPY website website/ +COPY setup.py . +COPY Pipfile . +COPY Pipfile.lock . + +RUN mkdir cache user_agents scraped + +RUN pipenv install +RUN echo LOOKYLOO_HOME="'`pwd`'" > .env diff --git a/bin/start_website.py b/bin/start_website.py index 4681c89..a2ab519 100755 --- a/bin/start_website.py +++ b/bin/start_website.py @@ -17,6 +17,7 @@ if __name__ == '__main__': p = Popen(['gunicorn', '--worker-class', 'eventlet', '-w', '10', '--graceful-timeout', '2', '--timeout', '30', '-b', '0.0.0.0:5100', + '--log-level', 'info', 'web:app'], cwd=website_dir) set_running('website') diff --git a/docker-compose.yml b/docker-compose.yml index 3005f07..f49ff27 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,24 +1,38 @@ version: '3' services: + redis: image: redis + working_dir: /cache + command: ./cache.conf --daemonize no + volumes: + - ./cache:/cache + splash: image: "scrapinghub/splash" - expose: - - "8050" - - "5023" - command: --disable-ui --disable-lua + ports: + - "8050:8050" + - "5023:5023" + command: --disable-lua --disable-private-mode + lookyloo: - build: . - ports: - - "5100:5100" - tty: true - command: - - /bin/sh - - -c - - | - pipenv run run_backend.py --start - pipenv run async_scrape.py & - pipenv run start_website.py - #volumes: - # - /path/to/your/data/directory:/lookyloo/scraped + build: . + working_dir: /lookyloo + tty: true + environment: + - SPLASH_URL=http://splash:8050 + command: + - /bin/sh + - -c + - | + pipenv run async_scrape.py & + pipenv run start_website.py + volumes: + - ./cache:/lookyloo/cache + - ./scraped:/lookyloo/scraped + - ./user_agents:/lookyloo/user_agents + ports: + - "5100:5100" + links: + - "redis" + - "splash" diff --git a/website/web/__init__.py b/website/web/__init__.py index 305cd59..06230e4 100644 --- a/website/web/__init__.py +++ b/website/web/__init__.py @@ -33,7 +33,10 @@ app.config['SESSION_COOKIE_NAME'] = 'lookyloo' app.debug = False # API entry point for splash -splash_url = 'http://127.0.0.1:8050' +if os.environ.get('SPLASH_URL'): + splash_url = os.environ.get('SPLASH_URL') +else: + splash_url = 'http://127.0.0.1:8050' # Splash log level loglevel = logging.DEBUG # Set it to True if your instance is publicly available so users aren't able to scan your internal network