diff --git a/bin/lib/Config_DB.py b/bin/lib/Config_DB.py new file mode 100755 index 00000000..67e106ab --- /dev/null +++ b/bin/lib/Config_DB.py @@ -0,0 +1,155 @@ +#!/usr/bin/python3 + +""" +Config save in DB +=================== + + +""" + +import os +import sys +import redis + +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib')) +import ConfigLoader + +config_loader = ConfigLoader.ConfigLoader() +r_serv_db = config_loader.get_redis_conn("ARDB_DB") +config_loader = None + +#### TO PUT IN CONFIG +# later => module timeout +# +## data retention +######################### + +default_config = { + "crawler": { + "enable_har_by_default": False, + "enable_screenshot_by_default": True, + "default_depth_limit": 1, + "default_closespider_pagecount": 50, + "default_user_agent": "Mozilla/5.0 (Windows NT 10.0; rv:68.0) Gecko/20100101 Firefox/68.0", + "default_timeout": 30 + } +} + +def get_default_config(): + return default_config + +def get_default_config_value(section, field): + return default_config[section][field] + +config_type = { + # crawler config + "crawler": { + "enable_har_by_default": bool, + "enable_screenshot_by_default": bool, + "default_depth_limit": int, + "default_closespider_pagecount": int, + "default_user_agent": str, + "default_timeout": int + } +} + +def get_config_type(section, field): + return config_type[section][field] + +# # TODO: add set, dict, list and select_(multiple_)value +def is_valid_type(obj, section, field, value_type=None): + res = isinstance(obj, get_config_type(section, field)) + return res + +def reset_default_config(): + pass + +def set_default_config(section, field): + save_config(section, field, get_default_config_value(section, field)) + +def get_all_config_sections(): + return list(get_default_config()) + +def get_all_config_fields_by_section(section): + return list(get_default_config()[section]) + +def get_config(section, field): + # config field don't exist + if not r_serv_db.hexists(f'config:global:{section}', field): + set_default_config(section, field) + return get_default_config_value(section, field) + + # load default config section + if not r_serv_db.exists('config:global:{}'.format(section)): + save_config(section, field, get_default_config_value(section, field)) + return get_default_config_value(section, field) + + return r_serv_db.hget(f'config:global:{section}', field) + +def get_config_dict_by_section(section): + config_dict = {} + for field in get_all_config_fields_by_section(section): + config_dict[field] = get_config(section, field) + return config_dict + +def save_config(section, field, value, value_type=None): ########################################### + if section in default_config: + if is_valid_type(value, section, field, value_type=value_type): + if value_type in ['list', 'set', 'dict']: + pass + else: + r_serv_db.hset(f'config:global:{section}', field, value) + # used by check_integrity + r_serv_db.sadd('config:all_global_section', field, value) + +# check config value + type +def check_integrity(): + pass + + +config_documentation = { + "crawler": { + "enable_har_by_default": 'Enable HAR by default', + "enable_screenshot_by_default": 'Enable screenshot by default', + "default_depth_limit": 'Maximum number of url depth', + "default_closespider_pagecount": 'Maximum number of pages', + "default_user_agent": "User agent used by default", + "default_timeout": "Crawler connection timeout" + } +} + +def get_config_documentation(section, field): + return config_documentation[section][field] + +# def conf_view(): +# class F(MyBaseForm): +# pass +# +# F.username = TextField('username') +# for name in iterate_some_model_dynamically(): +# setattr(F, name, TextField(name.title())) +# +# form = F(request.POST, ...) + +def get_field_full_config(section, field): + dict_config = {} + dict_config['value'] = get_config(section, field) + dict_config['type'] = get_config_type(section, field) + dict_config['info'] = get_config_documentation(section, field) + return dict_config + +def get_full_config_by_section(section): + dict_config = {} + for field in get_all_config_fields_by_section(section): + dict_config[field] = get_field_full_config(section, field) + return dict_config + +def get_full_config(): + dict_config = {} + for section in get_all_config_sections(): + dict_config[section] = get_full_config_by_section(section) + return dict_config + +if __name__ == '__main__': + res = get_full_config() + print(res) diff --git a/var/www/blueprints/crawler_splash.py b/var/www/blueprints/crawler_splash.py index 8bc9c1b1..7e7cac39 100644 --- a/var/www/blueprints/crawler_splash.py +++ b/var/www/blueprints/crawler_splash.py @@ -26,6 +26,7 @@ import Tag sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib')) import Domain import crawlers +import Config_DB r_cache = Flask_config.r_cache r_serv_db = Flask_config.r_serv_db @@ -438,10 +439,12 @@ def crawler_splash_setings(): splash_manager_url = crawlers.get_splash_manager_url() api_key = crawlers.get_hidden_splash_api_key() is_manager_connected = crawlers.ping_splash_manager() + crawler_full_config = Config_DB.get_full_config_by_section('crawler') return render_template("settings_splash_crawler.html", is_manager_connected=is_manager_connected, splash_manager_url=splash_manager_url, api_key=api_key, - all_splash=all_splash, all_proxies=all_proxies) + all_splash=all_splash, all_proxies=all_proxies, + crawler_full_config=crawler_full_config) ## - - ## diff --git a/var/www/templates/crawler/crawler_splash/settings_splash_crawler.html b/var/www/templates/crawler/crawler_splash/settings_splash_crawler.html index c1f8204a..9c66211d 100644 --- a/var/www/templates/crawler/crawler_splash/settings_splash_crawler.html +++ b/var/www/templates/crawler/crawler_splash/settings_splash_crawler.html @@ -102,6 +102,7 @@ Description + {% for splash_name in all_splash %} @@ -123,6 +124,13 @@ {{all_splash[splash_name]['description']}} + +
+ +
+ {% endfor %} @@ -153,6 +161,7 @@ Description + {% for proxy_name in all_proxies %} @@ -180,6 +189,13 @@ {{all_proxies[proxy_name]['description']}} + +
+ +
+ {% endfor %} @@ -196,6 +212,43 @@
+ + + + + + + + + {% for config_field in crawler_full_config %} + + + + + + + {% endfor %} + +
+ Key + + Description + + Value +
+ {{config_field}} + + {{crawler_full_config[config_field]['info']}} + + {{crawler_full_config[config_field]['value']}} + +
+ +
+
+