chg: [crawler] refactor crawler tasks + migrate cookiejars + add proxy option

pull/594/head
Terrtia 2023-02-21 12:22:49 +01:00
parent c04bc7bb57
commit 6842efc15d
No known key found for this signature in database
GPG Key ID: 1E1B1F50D84613D0
19 changed files with 568 additions and 788 deletions

View File

@ -131,29 +131,29 @@ Finally, you can quit this program by pressing either ``<q>`` or ``<C-c>``.
Crawler Crawler
--------------------- ---------------------
In AIL, you can crawl Tor hidden services. Don't forget to review the proxy configuration of your Tor client and especially if you enabled the SOCKS5 proxy and binding on the appropriate IP address reachable via the dockers where Splash runs. In AIL, you can crawl websites and Tor hidden services. Don't forget to review the proxy configuration of your Tor client and especially if you enabled the SOCKS5 proxy
[//]: # (and binding on the appropriate IP address reachable via the dockers where Splash runs.)
### Installation ### Installation
[Install AIL-Splash-Manager](https://github.com/ail-project/ail-splash-manager) [Install Lacus](https://github.com/ail-project/lacus)
### Configuration ### Configuration
1. Search the Splash-Manager API key. This API key is generated when you launch the manager for the first time. 1. Lacus URL:
(located in your Splash Manager directory ``ail-splash-manager/token_admin.txt``)
2. Splash Manager URL and API Key:
In the webinterface, go to ``Crawlers>Settings`` and click on the Edit button In the webinterface, go to ``Crawlers>Settings`` and click on the Edit button
![Splash Manager Config](./doc/screenshots/splash_manager_config_edit_1.png?raw=true "AIL framework Splash Manager Config")
![Splash Manager Config](./doc/screenshots/splash_manager_config_edit_2.png?raw=true "AIL framework Splash Manager Config") ![Splash Manager Config](./doc/screenshots/lacus_config.png?raw=true "AIL Lacus Config")
3. Launch AIL Crawlers: ![Splash Manager Config](./doc/screenshots/lacus_config_edit.png?raw=true "AIL Lacus Config")
2. Launch AIL Crawlers:
Choose the number of crawlers you want to launch Choose the number of crawlers you want to launch
![Splash Manager Nb Crawlers Config](./doc/screenshots/splash_manager_nb_crawlers_1.png?raw=true "AIL framework Nb Crawlers Config")
![Splash Manager Nb Crawlers Config](./doc/screenshots/splash_manager_nb_crawlers_2.png?raw=true "AIL framework Nb Crawlers Config") ![Splash Manager Nb Crawlers Config](./doc/screenshots/crawler_nb_captures.png?raw=true "AIL Lacus Nb Crawlers Config")
![Splash Manager Nb Crawlers Config](./doc/screenshots/crawler_nb_captures_edit.png?raw=true "AIL Lacus Nb Crawlers Config")

View File

@ -65,31 +65,29 @@ class Crawler(AbstractModule):
def get_message(self): def get_message(self):
# Check if a new Capture can be Launched # Check if a new Capture can be Launched
if crawlers.get_nb_crawler_captures() < crawlers.get_crawler_max_captures(): if crawlers.get_nb_crawler_captures() < crawlers.get_crawler_max_captures():
task_row = crawlers.get_crawler_task_from_queue() task_row = crawlers.add_task_to_lacus_queue()
if task_row: if task_row:
print(task_row) print(task_row)
task_uuid, priority = task_row task_uuid, priority = task_row
self.enqueue_capture(task_uuid, priority) self.enqueue_capture(task_uuid, priority)
# Check if a Capture is Done # Get CrawlerCapture Object
capture = crawlers.get_crawler_capture() capture = crawlers.get_crawler_capture()
if capture: if capture:
print(capture) print(capture.uuid)
capture_uuid = capture[0][0] status = self.lacus.get_capture_status(capture.uuid)
capture_status = self.lacus.get_capture_status(capture_uuid) if status != crawlers.CaptureStatus.DONE: # TODO ADD GLOBAL TIMEOUT-> Save start time
if capture_status != crawlers.CaptureStatus.DONE: # TODO ADD GLOBAL TIMEOUT-> Save start time capture.update(status)
crawlers.update_crawler_capture(capture_uuid) print(capture.uuid, status, int(time.time()))
print(capture_uuid, capture_status, int(time.time()))
else: else:
self.compute(capture_uuid) self.compute(capture)
crawlers.remove_crawler_capture(capture_uuid) capture.delete() # TODO DELETE TASK ONLY IF NOT SCHEDULED TASKS
print('capture', capture_uuid, 'completed') print('capture', capture.uuid, 'completed')
time.sleep(self.pending_seconds) time.sleep(self.pending_seconds)
def enqueue_capture(self, task_uuid, priority): def enqueue_capture(self, task_uuid, priority):
task = crawlers.get_crawler_task(task_uuid) task = crawlers.CrawlerTask(task_uuid)
print(task) print(task)
# task = { # task = {
# 'uuid': task_uuid, # 'uuid': task_uuid,
@ -104,47 +102,43 @@ class Crawler(AbstractModule):
# 'proxy': 'force_tor', # 'proxy': 'force_tor',
# 'parent': 'manual', # 'parent': 'manual',
# } # }
url = task['url'] url = task.get_url()
force = priority != 0 force = priority != 0
# TODO timeout
# TODO unpack cookiejar
# TODO HEADER # TODO HEADER
capture_uuid = self.lacus.enqueue(url=url, capture_uuid = self.lacus.enqueue(url=url,
depth=task['depth'], depth=task.get_depth(),
user_agent=task['user_agent'], user_agent=task.get_user_agent(),
proxy=task['proxy'], proxy=task.get_proxy(),
cookies=[], cookies=task.get_cookies(),
force=force, force=force,
general_timeout_in_sec=90) general_timeout_in_sec=90)
crawlers.add_crawler_capture(task_uuid, capture_uuid) crawlers.create_capture(capture_uuid, task_uuid)
print(task_uuid, capture_uuid, 'launched') print(task.uuid, capture_uuid, 'launched')
return capture_uuid return capture_uuid
# CRAWL DOMAIN # CRAWL DOMAIN
# TODO: CATCH ERRORS # TODO: CATCH ERRORS
def compute(self, capture_uuid): def compute(self, capture):
print('saving capture', capture.uuid)
print('saving capture', capture_uuid) task = capture.get_task()
domain = task.get_domain()
print(domain)
task_uuid = crawlers.get_crawler_capture_task_uuid(capture_uuid) self.domain = Domain(domain)
task = crawlers.get_crawler_task(task_uuid)
print(task['domain'])
self.domain = Domain(task['domain'])
# TODO CHANGE EPOCH # TODO CHANGE EPOCH
epoch = int(time.time()) epoch = int(time.time())
parent_id = task['parent'] parent_id = task.get_parent()
print(task)
entries = self.lacus.get_capture(capture_uuid) entries = self.lacus.get_capture(capture.uuid)
print(entries['status']) print(entries['status'])
self.har = task['har'] self.har = task.get_har()
self.screenshot = task['screenshot'] self.screenshot = task.get_screenshot()
str_date = crawlers.get_current_date(separator=True) str_date = crawlers.get_current_date(separator=True)
self.har_dir = crawlers.get_date_har_dir(str_date) self.har_dir = crawlers.get_date_har_dir(str_date)
self.items_dir = crawlers.get_date_crawled_items_source(str_date) self.items_dir = crawlers.get_date_crawled_items_source(str_date)
@ -156,14 +150,13 @@ class Crawler(AbstractModule):
self.domain.update_daterange(str_date.replace('/', '')) self.domain.update_daterange(str_date.replace('/', ''))
# Origin + History # Origin + History
if self.root_item: if self.root_item:
# domain.add_ports(port)
self.domain.set_last_origin(parent_id) self.domain.set_last_origin(parent_id)
self.domain.add_history(epoch, root_item=self.root_item) self.domain.add_history(epoch, root_item=self.root_item)
elif self.domain.was_up(): elif self.domain.was_up():
self.domain.add_history(epoch, root_item=epoch) self.domain.add_history(epoch, root_item=epoch)
crawlers.update_last_crawled_domain(self.domain.get_domain_type(), self.domain.id, epoch) crawlers.update_last_crawled_domain(self.domain.get_domain_type(), self.domain.id, epoch)
crawlers.clear_crawler_task(task_uuid, self.domain.get_domain_type()) task.clear()
def save_capture_response(self, parent_id, entries): def save_capture_response(self, parent_id, entries):
print(entries.keys()) print(entries.keys())
@ -242,14 +235,6 @@ if __name__ == '__main__':
################################## ##################################
################################## ##################################
# from Helper import Process
# from pubsublogger import publisher
# ======== FUNCTIONS ========
# def update_auto_crawler(): # def update_auto_crawler():
# current_epoch = int(time.time()) # current_epoch = int(time.time())
# list_to_crawl = redis_crawler.zrangebyscore('crawler_auto_queue', '-inf', current_epoch) # list_to_crawl = redis_crawler.zrangebyscore('crawler_auto_queue', '-inf', current_epoch)

File diff suppressed because it is too large Load Diff

View File

@ -91,7 +91,7 @@ class Onion(AbstractModule):
if onion_urls: if onion_urls:
if crawlers.is_crawler_activated(): if crawlers.is_crawler_activated():
for domain in domains: # TODO LOAD DEFAULT SCREENSHOT + HAR for domain in domains: # TODO LOAD DEFAULT SCREENSHOT + HAR
task_uuid = crawlers.add_crawler_task(domain, parent=item.get_id()) task_uuid = crawlers.create_task(domain, parent=item.get_id(), priority=0)
if task_uuid: if task_uuid:
print(f'{domain} added to crawler queue: {task_uuid}') print(f'{domain} added to crawler queue: {task_uuid}')
else: else:

View File

@ -10,8 +10,8 @@ This module spots zerobins-like services for further processing
# Import External packages # Import External packages
################################## ##################################
import os import os
import sys
import re import re
import sys
sys.path.append(os.environ['AIL_BIN']) sys.path.append(os.environ['AIL_BIN'])
################################## ##################################
@ -30,7 +30,7 @@ class Zerobins(AbstractModule):
super(Zerobins, self).__init__() super(Zerobins, self).__init__()
binz = [ binz = [
r'^https:\/\/(zerobin||privatebin)\..*$', # historical ones r'^https:\/\/(zerobin||privatebin)\..*$', # historical ones
] ]
self.regex = re.compile('|'.join(binz)) self.regex = re.compile('|'.join(binz))
@ -59,13 +59,13 @@ class Zerobins(AbstractModule):
if len(matching_binz) > 0: if len(matching_binz) > 0:
for bin_url in matching_binz: for bin_url in matching_binz:
print(f'send {bin_url} to crawler') print(f'send {bin_url} to crawler')
crawlers.add_crawler_task(bin_url, depth=0, har=False, screenshot=False, proxy='force_tor', # TODO Change priority ???
parent='manual', priority=10) crawlers.create_task(bin_url, depth=0, har=False, screenshot=False, proxy='force_tor',
parent='manual', priority=60)
self.redis_logger.debug("Compute message in queue") self.redis_logger.debug("Compute message in queue")
# TODO TEST ME
if __name__ == '__main__': if __name__ == '__main__':
module = Zerobins() module = Zerobins()
module.run() module.run()

Binary file not shown.

After

Width:  |  Height:  |  Size: 27 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 67 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 73 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 66 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 104 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 66 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 51 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 65 KiB

View File

@ -60,7 +60,7 @@ def create_json_response(data, status_code):
@login_read_only @login_read_only
def crawlers_dashboard(): def crawlers_dashboard():
is_manager_connected = crawlers.get_lacus_connection_metadata() is_manager_connected = crawlers.get_lacus_connection_metadata()
crawlers_status = crawlers.get_crawler_capture_status() crawlers_status = crawlers.get_captures_status()
print(crawlers_status) print(crawlers_status)
crawlers_latest_stats = crawlers.get_crawlers_stats() crawlers_latest_stats = crawlers.get_crawlers_stats()
print(crawlers_latest_stats) print(crawlers_latest_stats)
@ -75,7 +75,7 @@ def crawlers_dashboard():
@login_required @login_required
@login_read_only @login_read_only
def crawler_dashboard_json(): def crawler_dashboard_json():
crawlers_status = crawlers.get_crawler_capture_status() crawlers_status = crawlers.get_captures_status()
crawlers_latest_stats = crawlers.get_crawlers_stats() crawlers_latest_stats = crawlers.get_crawlers_stats()
return jsonify({'crawlers_status': crawlers_status, return jsonify({'crawlers_status': crawlers_status,
@ -106,7 +106,6 @@ def send_to_spider():
# POST val # POST val
url = request.form.get('url_to_crawl') url = request.form.get('url_to_crawl')
crawler_type = request.form.get('crawler_queue_type') crawler_type = request.form.get('crawler_queue_type')
proxy = request.form.get('proxy_name')
auto_crawler = request.form.get('crawler_type') # TODO Auto Crawler auto_crawler = request.form.get('crawler_type') # TODO Auto Crawler
crawler_delta = request.form.get('crawler_epoch') # TODO Auto Crawler crawler_delta = request.form.get('crawler_epoch') # TODO Auto Crawler
screenshot = request.form.get('screenshot') screenshot = request.form.get('screenshot')
@ -114,7 +113,13 @@ def send_to_spider():
depth_limit = request.form.get('depth_limit') depth_limit = request.form.get('depth_limit')
cookiejar_uuid = request.form.get('cookiejar') cookiejar_uuid = request.form.get('cookiejar')
if crawler_type == 'onion': # PROXY
proxy = request.form.get('proxy_name')
if proxy:
res = crawlers.api_verify_proxy(proxy)
if res[1] != 200:
return create_json_response(res[0], res[1])
elif crawler_type == 'onion':
proxy = 'force_tor' proxy = 'force_tor'
if cookiejar_uuid: if cookiejar_uuid:
@ -129,6 +134,7 @@ def send_to_spider():
data['proxy'] = proxy data['proxy'] = proxy
if cookiejar_uuid: if cookiejar_uuid:
data['cookiejar'] = cookiejar_uuid data['cookiejar'] = cookiejar_uuid
# print(data)
res = crawlers.api_add_crawler_task(data, user_id=user_id) res = crawlers.api_add_crawler_task(data, user_id=user_id)
if res[1] != 200: if res[1] != 200:
@ -655,36 +661,6 @@ def crawler_cookiejar_cookie_json_add_post():
# --- Cookiejar ---# # --- Cookiejar ---#
@crawler_splash.route('/crawler/settings/crawlers_to_lauch', methods=['GET', 'POST'])
@login_required
@login_admin
def crawler_splash_setings_crawlers_to_lauch():
if request.method == 'POST':
dict_splash_name = {}
for crawler_name in list(request.form):
dict_splash_name[crawler_name] = request.form.get(crawler_name)
res = crawlers.api_set_nb_crawlers_to_launch(dict_splash_name)
if res[1] != 200:
return Response(json.dumps(res[0], indent=2, sort_keys=True), mimetype='application/json'), res[1]
else:
return redirect(url_for('crawler_splash.crawler_splash_setings'))
else:
nb_crawlers_to_launch = crawlers.get_nb_crawlers_to_launch_ui()
return render_template("settings_edit_crawlers_to_launch.html",
nb_crawlers_to_launch=nb_crawlers_to_launch)
@crawler_splash.route('/crawler/settings/relaunch_crawler', methods=['GET'])
@login_required
@login_admin
def crawler_splash_setings_relaunch_crawler():
crawlers.relaunch_crawlers()
return redirect(url_for('crawler_splash.crawler_splash_setings'))
## - - ##
#### LACUS #### #### LACUS ####
@crawler_splash.route('/crawler/settings', methods=['GET']) @crawler_splash.route('/crawler/settings', methods=['GET'])
@ -693,6 +669,7 @@ def crawler_splash_setings_relaunch_crawler():
def crawler_settings(): def crawler_settings():
lacus_url = crawlers.get_lacus_url() lacus_url = crawlers.get_lacus_url()
api_key = crawlers.get_hidden_lacus_api_key() api_key = crawlers.get_hidden_lacus_api_key()
nb_captures = crawlers.get_crawler_max_captures()
is_manager_connected = crawlers.get_lacus_connection_metadata(force_ping=True) is_manager_connected = crawlers.get_lacus_connection_metadata(force_ping=True)
is_crawler_working = crawlers.is_test_ail_crawlers_successful() is_crawler_working = crawlers.is_test_ail_crawlers_successful()
@ -701,14 +678,13 @@ def crawler_settings():
# TODO REGISTER PROXY # TODO REGISTER PROXY
# all_proxies = crawlers.get_all_proxies_metadata() # all_proxies = crawlers.get_all_proxies_metadata()
# nb_crawlers_to_launch = crawlers.get_nb_crawlers_to_launch()
# crawler_full_config = Config_DB.get_full_config_by_section('crawler') # crawler_full_config = Config_DB.get_full_config_by_section('crawler')
return render_template("settings_crawler.html", return render_template("settings_crawler.html",
is_manager_connected=is_manager_connected, is_manager_connected=is_manager_connected,
lacus_url=lacus_url, api_key=api_key, lacus_url=lacus_url, api_key=api_key,
nb_captures=nb_captures,
# all_proxies=all_proxies, # all_proxies=all_proxies,
# nb_crawlers_to_launch=nb_crawlers_to_launch,
is_crawler_working=is_crawler_working, is_crawler_working=is_crawler_working,
crawler_error_mess=crawler_error_mess, crawler_error_mess=crawler_error_mess,
) )
@ -733,6 +709,22 @@ def crawler_lacus_settings_crawler_manager():
api_key = crawlers.get_lacus_api_key() api_key = crawlers.get_lacus_api_key()
return render_template("settings_edit_lacus_crawler.html", lacus_url=lacus_url, api_key=api_key) return render_template("settings_edit_lacus_crawler.html", lacus_url=lacus_url, api_key=api_key)
@crawler_splash.route('/crawler/settings/crawlers_to_launch', methods=['GET', 'POST'])
@login_required
@login_admin
def crawler_settings_crawlers_to_launch():
if request.method == 'POST':
nb_captures = request.form.get('nb_captures')
res = crawlers.api_set_crawler_max_captures({'nb': nb_captures})
if res[1] != 200:
return create_json_response(res[0], res[1])
else:
return redirect(url_for('crawler_splash.crawler_settings'))
else:
nb_captures = crawlers.get_crawler_max_captures()
return render_template("settings_edit_crawlers_to_launch.html",
nb_captures=nb_captures)
@crawler_splash.route('/crawler/settings/crawler/test', methods=['GET']) @crawler_splash.route('/crawler/settings/crawler/test', methods=['GET'])
@login_required @login_required

View File

@ -1,31 +1,31 @@
{%if not is_manager_connected['status']%} {%if not is_manager_connected['status']%}
<div class="alert alert-secondary text-center my-2" role="alert"> <div class="alert alert-secondary text-center my-2" role="alert">
<h1><i class="fas fa-times-circle text-danger"></i> Crawler Disabled</h1> <h1><i class="fas fa-times-circle text-danger"></i> Crawler Disabled</h1>
<p> <p>
{%if 'error' in is_manager_connected%} {%if 'error' in is_manager_connected%}
<b>{{is_manager_connected['status_code']}}</b> <b>{{is_manager_connected['status_code']}}</b>
<br> <br>
<b>Error:</b> {{is_manager_connected['error']}} <b>Error:</b> {{is_manager_connected['error']}}
{%else%} {%else%}
<b>Error:</b> core/Crawler_manager not launched <b>Error:</b> Lacus not connected
{%endif%} {%endif%}
</p> </p>
<div style="max-width: 500px;"> <div style="max-width: 500px;">
<ul class="list-group my-3"> <ul class="list-group my-3">
<li class="list-group-item bg-dark text-white">Splash Manager Features:</li> <li class="list-group-item bg-dark text-white"><h3>Lacus Features:</h3></li>
<li class="list-group-item">Install and run Splash crawlers on another server</li> <li class="list-group-item">Install and run crawlers on another server</li>
<li class="list-group-item">Handle proxies (Web and tor)</li> <li class="list-group-item">Handle proxies ( <i class="fab fa-html5"></i> Web and <i class="fas fa-user-secret"></i> tor)</li>
<li class="list-group-item">Launch/Kill Splash Dockers</li> <li class="list-group-item">Multiple Concurrent Captures</li>
<li class="list-group-item">Restart crawlers on crash</li> <li class="list-group-item">HOW TO</li>
<li class="list-group-item"> <li class="list-group-item">
<div class="d-flex justify-content-center"> <div class="d-flex justify-content-center">
<a class="btn btn-info" href="https://github.com/ail-project/ail-splash-manager" role="button"> <a class="btn btn-info" href="https://github.com/ail-project/lacus" role="button">
<i class="fab fa-github"></i> Install and Configure AIL-Splash-Manager <i class="fab fa-github"></i> Install and Configure Lacus
</a> </a>
</div> </div>
</li> </li>
</ul> </ul>
</div>
</div> </div>
</div>
{%endif%} {%endif%}

View File

@ -60,13 +60,8 @@
{%endfor%} {%endfor%}
</select> </select>
</div> </div>
<div id="div_proxy_name"> <div class="input-group" id="div_proxy_name">
<select class="custom-select form-control" name="proxy_name" id="proxy_name"> <input type="text" class="form-control" id="proxy_name" name="proxy_name" placeholder="Expected Format: [scheme]://[username]:[password]@[hostname]:[port]">
<option value="None" selected>Use a proxy</option>
{%for proxy in proxies%}
<option value="{{proxy}}">{{proxy}}</option>
{%endfor%}
</select>
</div> </div>
<div class="d-flex mt-3"> <div class="d-flex mt-3">
<i class="fas fa-user-ninja mt-1"></i> &nbsp;Manual&nbsp;&nbsp; <i class="fas fa-user-ninja mt-1"></i> &nbsp;Manual&nbsp;&nbsp;

View File

@ -33,7 +33,7 @@
<h5 class="card-title"><i class="fas fa-cookie-bite"></i> Edit Cookie: {{cookie_uuid}}</h5> <h5 class="card-title"><i class="fas fa-cookie-bite"></i> Edit Cookie: {{cookie_uuid}}</h5>
</div> </div>
<div class="col-4"> <div class="col-4">
<a class="btn btn-danger float-right" href="{{ url_for('crawler_splash.crawler_cookiejar_cookie_delete') }}?cookiejar_uuid={{cookiejar_uuid}}&cookie_uuid={{cookie_uuid}}"> <a class="btn btn-danger float-right" href="{{ url_for('crawler_splash.crawler_cookiejar_cookie_delete') }}?uuid={{cookie_uuid}}">
<i class="fas fa-trash-alt"></i> <i class="fas fa-trash-alt"></i>
</a> </a>
</div> </div>

View File

@ -53,7 +53,7 @@
</div> </div>
{% endif %} {% endif %}
</span> </span>
<h4>Splash Crawler Manager</h4> <h4>AIL Lacus Crawler</h4>
</div> </div>
<div class="card-body"> <div class="card-body">
@ -92,52 +92,43 @@
</div> </div>
</div> </div>
<div class="card border-secondary"> {# <div class="card border-secondary">#}
<div class="card-body text-dark"> {# <div class="card-body text-dark">#}
<h5 class="card-title">All Proxies:</h5> {# <h5 class="card-title">All Proxies:</h5>#}
<table class="table table-striped"> {# <table class="table table-striped">#}
<thead class="bg-info text-white"> {# <thead class="bg-info text-white">#}
<tr> {# <tr>#}
<th>Proxy name</th> {# <th>Proxy name</th>#}
<th>URL</th> {# <th>URL</th>#}
<th>Crawler Type</th> {# <th>Description</th>#}
<th>Description</th> {# <th></th>#}
<th></th> {# </tr>#}
</tr> {# </thead>#}
</thead> {# <tbody>#}
<tbody> {# {% for proxy_name in all_proxies %}#}
{% for proxy_name in all_proxies %} {# <tr>#}
<tr> {# <td>#}
<td> {# {{proxy_name}}#}
{{proxy_name}} {# </td>#}
</td> {# <td>#}
<td> {# {{all_proxies[proxy_name]['url']}}#}
{{all_proxies[proxy_name]['url']}} {# </td>#}
</td> {# <td>#}
<td> {# {{all_proxies[proxy_name]['description']}}#}
{%if all_proxies[proxy_name]['crawler_type']=='tor'%} {# </td>#}
<i class="fas fa-user-secret"></i> {# <td>#}
{%else%} {# <div class="d-flex justify-content-end">#}
<i class="fab fa-html5"></i> {# <!-- <button class="btn btn-outline-dark px-1 py-0">#}
{%endif%} {# <i class="fas fa-pencil-alt"></i>#}
{{all_proxies[proxy_name]['crawler_type']}} {# </button> -->#}
</td> {# </div>#}
<td> {# </td>#}
{{all_proxies[proxy_name]['description']}} {# </tr>#}
</td> {# {% endfor %}#}
<td> {# </tbody>#}
<div class="d-flex justify-content-end"> {# </table>#}
<!-- <button class="btn btn-outline-dark px-1 py-0"> {# </div>#}
<i class="fas fa-pencil-alt"></i> {# </div>#}
</button> -->
</div>
</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
</div>
</div> </div>
</div> </div>
@ -176,25 +167,12 @@
<div class="card border-secondary my-4"> <div class="card border-secondary my-4">
<div class="card-body text-dark"> <div class="card-body text-dark">
<h5 class="card-title">Number of Crawlers to Launch:</h5> <h5 class="card-title">
<table class="table table-sm"> Number of Concurrent Crawlers to Launch: &nbsp;&nbsp;<b class="text-primary">{{ nb_captures }}</b>
<tbody> </h5>
{%for crawler in nb_crawlers_to_launch%} <a href="{{ url_for('crawler_splash.crawler_settings_crawlers_to_launch') }}">
<tr>
<td>{{crawler}}</td>
<td>{{nb_crawlers_to_launch[crawler]}}</td>
</tr>
{%endfor%}
</tbody>
</table>
<a href="{{ url_for('crawler_splash.crawler_splash_setings_crawlers_to_lauch') }}">
<button type="button" class="btn btn-info"> <button type="button" class="btn btn-info">
Edit number of crawlers to launch <i class="fas fa-pencil-alt"></i> Edit <i class="fas fa-pencil-alt"></i>
</button>
</a>
<a href="{{ url_for('crawler_splash.crawler_splash_setings_relaunch_crawler') }}">
<button type="button" class="btn btn-danger">
ReLaunch Crawlers <i class="fas fa-redo"></i>
</button> </button>
</a> </a>
</div> </div>

View File

@ -26,22 +26,17 @@
<div class="col-12 col-lg-10" id="core_content"> <div class="col-12 col-lg-10" id="core_content">
<form action="{{ url_for('crawler_splash.crawler_splash_setings_crawlers_to_lauch') }}" method="post" enctype="multipart/form-data"> <div class="card my-2">
<h5 class="card-title">Number of Crawlers to Launch:</h5> <div class="card-header bg-dark text-white">
<table class="table table-sm">
<tbody> <form action="{{ url_for('crawler_splash.crawler_settings_crawlers_to_launch') }}" method="post" enctype="multipart/form-data">
{%for crawler_name in nb_crawlers_to_launch%} <h3 class="card-title">Number of Concurrent Crawlers to Launch:</h3>
<tr> <input class="form-control" type="number" id="nb_captures" value="{{ nb_captures }}" min="1" name="nb_captures" required>
<td>{{crawler_name}}</td>
<td> <button type="submit" class="btn btn-primary my-2">Edit <i class="fas fa-pencil-alt"></i></button>
<input class="form-control" type="number" id="{{crawler_name}}" value="{{nb_crawlers_to_launch[crawler_name]}}" min="0" name="{{crawler_name}}" required> </form>
</td> </div>
</tr> </div>
{%endfor%}
</tbody>
</table>
<button type="submit" class="btn btn-primary">Edit <i class="fas fa-pencil-alt"></i></button>
</form>
</div> </div>
</div> </div>