diff --git a/bin/LAUNCH.sh b/bin/LAUNCH.sh index 3b17a4a6..684af83b 100755 --- a/bin/LAUNCH.sh +++ b/bin/LAUNCH.sh @@ -201,28 +201,32 @@ function launching_scripts { } function launching_crawler { - CONFIG=$AIL_BIN/packages/config.cfg - lport=$(awk '/^\[Crawler\]/{f=1} f==1&&/^splash_onion_port/{print $3;exit}' "${CONFIG}") + if [[ ! $iscrawler ]]; then + CONFIG=$AIL_BIN/packages/config.cfg + lport=$(awk '/^\[Crawler\]/{f=1} f==1&&/^splash_onion_port/{print $3;exit}' "${CONFIG}") - IFS='-' read -ra PORTS <<< "$lport" - if [ ${#PORTS[@]} -eq 1 ] - then - first_port=${PORTS[0]} - last_port=${PORTS[0]} - else - first_port=${PORTS[0]} - last_port=${PORTS[1]} - fi + IFS='-' read -ra PORTS <<< "$lport" + if [ ${#PORTS[@]} -eq 1 ] + then + first_port=${PORTS[0]} + last_port=${PORTS[0]} + else + first_port=${PORTS[0]} + last_port=${PORTS[1]} + fi - screen -dmS "Crawler_AIL" - sleep 0.1 - - for ((i=first_port;i<=last_port;i++)); do - screen -S "Crawler_AIL" -X screen -t "onion_crawler:$i" bash -c 'cd '${AIL_BIN}'; ./Crawler.py onion '$i'; read x' + screen -dmS "Crawler_AIL" sleep 0.1 - done - echo -e $GREEN"\t* Launching Crawler_AIL scripts"$DEFAULT + for ((i=first_port;i<=last_port;i++)); do + screen -S "Crawler_AIL" -X screen -t "onion_crawler:$i" bash -c 'cd '${AIL_BIN}'; ./Crawler.py onion '$i'; read x' + sleep 0.1 + done + + echo -e $GREEN"\t* Launching Crawler_AIL scripts"$DEFAULT + else + echo -e $RED"\t* A screen is already launched"$DEFAULT + fi } function shutting_down_redis { diff --git a/bin/torcrawler/TorSplashCrawler.py b/bin/torcrawler/TorSplashCrawler.py index 59060ba3..47486dd9 100644 --- a/bin/torcrawler/TorSplashCrawler.py +++ b/bin/torcrawler/TorSplashCrawler.py @@ -114,7 +114,7 @@ class TorSplashCrawler(): # down ? print('504 detected') elif response.status != 200: - #print('other: {}'.format(response.status)) + print('other response: {}'.format(response.status)) #print(error_log) #detect connection to proxy refused error_log = (json.loads(response.body.decode())) diff --git a/bin/torcrawler/launch_splash_crawler.sh b/bin/torcrawler/launch_splash_crawler.sh index 5f3f9020..412022c1 100755 --- a/bin/torcrawler/launch_splash_crawler.sh +++ b/bin/torcrawler/launch_splash_crawler.sh @@ -37,7 +37,7 @@ sleep 0.1 for ((i=0;i<=$((${n} - 1));i++)); do port_number=$((${p} + $i)) - screen -S "Docker_Splash" -X screen -t "docker_splash:$port_number" bash -c 'sudo docker run -p '$port_number':8050 --cpus=1 -v '$f':/etc/splash/proxy-profiles/ --net="bridge" scrapinghub/splash; read x' + screen -S "Docker_Splash" -X screen -t "docker_splash:$port_number" bash -c 'sudo docker run -p '$port_number':8050 --cpus=1 --memory=4.5G -v '$f':/etc/splash/proxy-profiles/ --net="bridge" scrapinghub/splash; read x' sleep 0.1 echo " Splash server launched on port $port_number" done diff --git a/var/www/modules/hiddenServices/templates/hiddenServices.html b/var/www/modules/hiddenServices/templates/hiddenServices.html index bbc66ace..1784aa72 100644 --- a/var/www/modules/hiddenServices/templates/hiddenServices.html +++ b/var/www/modules/hiddenServices/templates/hiddenServices.html @@ -6,6 +6,7 @@ Hidden Service - AIL + diff --git a/var/www/modules/hiddenServices/templates/showDomain.html b/var/www/modules/hiddenServices/templates/showDomain.html index 1d58c4ba..50b3c631 100644 --- a/var/www/modules/hiddenServices/templates/showDomain.html +++ b/var/www/modules/hiddenServices/templates/showDomain.html @@ -6,6 +6,7 @@ Show Domain - AIL + diff --git a/var/www/modules/showpaste/Flask_showpaste.py b/var/www/modules/showpaste/Flask_showpaste.py index e4fc3cfd..f79239a3 100644 --- a/var/www/modules/showpaste/Flask_showpaste.py +++ b/var/www/modules/showpaste/Flask_showpaste.py @@ -186,7 +186,6 @@ def showpaste(content_range, requested_path): crawler_metadata['domain'] = r_serv_metadata.hget('paste_metadata:'+requested_path, 'domain') crawler_metadata['paste_father'] = r_serv_metadata.hget('paste_metadata:'+requested_path, 'father') crawler_metadata['real_link'] = r_serv_metadata.hget('paste_metadata:'+requested_path,'real_link') - crawler_metadata['external_links'] =r_serv_metadata.scard('paste_onion_external_links:'+requested_path) crawler_metadata['screenshot'] = paste.get_p_rel_path() else: crawler_metadata['get_metadata'] = False diff --git a/var/www/modules/showpaste/templates/show_saved_paste.html b/var/www/modules/showpaste/templates/show_saved_paste.html index 72add804..440c82e2 100644 --- a/var/www/modules/showpaste/templates/show_saved_paste.html +++ b/var/www/modules/showpaste/templates/show_saved_paste.html @@ -424,9 +424,9 @@
-
+
- Graph + Crawled Paste
@@ -443,10 +443,6 @@ - - - -
Source link {{ crawler_metadata['real_link'] }}
External links{{ crawler_metadata['external_links'] }}