mirror of https://github.com/CIRCL/AIL-framework
chg: [crawler] add infos
parent
e4a5f66a10
commit
c49e871ba8
|
@ -36,7 +36,8 @@ def crawl_onion(url, domain, date, date_month, message):
|
|||
r_onion.sadd('{}_crawler_queue'.format(type_hidden_service), message)
|
||||
|
||||
print('--------------------------------------')
|
||||
print(' DOCKER SPLASH DOWN')
|
||||
print(' \033[91m DOCKER SPLASH DOWN\033[0m')
|
||||
print(' {} DOWN'.format(splash_url))
|
||||
exit(0)
|
||||
|
||||
if r.status_code == 200:
|
||||
|
@ -94,7 +95,6 @@ if __name__ == '__main__':
|
|||
print('incorrect crawler type: {}'.format(type_hidden_service))
|
||||
exit(0)
|
||||
|
||||
print('crawler type: {}'.format(type_hidden_service))
|
||||
print('splash url: {}'.format(splash_url))
|
||||
|
||||
crawler_depth_limit = p.config.getint("Crawler", "crawler_depth_limit")
|
||||
|
@ -132,13 +132,11 @@ if __name__ == '__main__':
|
|||
message = r_onion.spop('mess_onion')
|
||||
|
||||
if message is not None:
|
||||
print(message)
|
||||
|
||||
splitted = message.split(';')
|
||||
if len(splitted) == 2:
|
||||
url, paste = splitted
|
||||
paste = paste.replace(PASTES_FOLDER+'/', '')
|
||||
print(paste)
|
||||
'''
|
||||
if not '.onion' in url:
|
||||
print('not onion')
|
||||
|
@ -157,9 +155,9 @@ if __name__ == '__main__':
|
|||
|
||||
domain_url = 'http://{}'.format(domain)
|
||||
|
||||
print('------------------START CRAWLER------------------')
|
||||
print(type_hidden_service)
|
||||
print('-------------------------------------------------')
|
||||
print('\033[92m------------------START CRAWLER------------------\033[0m')
|
||||
print('crawler type: {}'.format(type_hidden_service))
|
||||
print('\033[92m-------------------------------------------------\033[0m')
|
||||
print('url: {}'.format(url))
|
||||
print('domain: {}'.format(domain))
|
||||
print('domain_url: {}'.format(domain_url))
|
||||
|
|
|
@ -1,6 +1,14 @@
|
|||
#!/bin/bash
|
||||
|
||||
#usage() { echo "Usage: sudo $0 [-f <config absolute_path>] [-p <port_start>] [-n <number_of_splash>]" 1>&2; exit 1; }
|
||||
usage() { echo "Usage: sudo $0 [-f <config_absolute_path>] [-p <port_start>] [-n <number_of_splash_servers>]" 1>&2;
|
||||
echo " -f: absolute path to splash docker proxy-profiles directory (used for proxy configuration)"
|
||||
echo " -p: number of the first splash server port number. This number is incremented for the others splash server"
|
||||
echo " -n: number of splash servers to start"
|
||||
echo ""
|
||||
echo "example:"
|
||||
echo "sudo ./launch_splash_crawler.sh -f /home/my_user/AIL-framework/configs/docker/splash_onion/etc/splash/proxy-profiles/ -p 8050 -n 3"
|
||||
exit 1;
|
||||
}
|
||||
|
||||
while getopts ":p:f:n:" o; do
|
||||
case "${o}" in
|
||||
|
@ -25,14 +33,11 @@ if [ -z "${p}" ] || [ -z "${f}" ] || [ -z "${n}" ]; then
|
|||
echo "usage"
|
||||
fi
|
||||
|
||||
first_port=$p
|
||||
echo "usage0"
|
||||
screen -dmS "Docker_Splash"
|
||||
echo "usage1"
|
||||
sleep 0.1
|
||||
|
||||
for ((i=0;i<=$((${n} - 1));i++)); do
|
||||
port_number=$((${p} + $i))
|
||||
screen -S "Docker_Splash" -X screen -t "docker_splash:$i" bash -c 'sudo docker run -p '$port_number':8050 --cpus=1 -v '$f':/etc/splash/proxy-profiles/ --net="bridge" scrapinghub/splash; read x'
|
||||
screen -S "Docker_Splash" -X screen -t "docker_splash:$port_number" bash -c 'sudo docker run -p '$port_number':8050 --cpus=1 -v '$f':/etc/splash/proxy-profiles/ --net="bridge" scrapinghub/splash; read x'
|
||||
sleep 0.1
|
||||
done
|
||||
|
|
|
@ -1,7 +1,12 @@
|
|||
#!/bin/bash
|
||||
|
||||
# install docker
|
||||
sudo apt install docker.io
|
||||
read -p "Do you want to install docker? (use local splash server) [y/n] " -n 1 -r
|
||||
echo # (optional) move to a new line
|
||||
if [[ $REPLY =~ ^[Yy]$ ]]
|
||||
then
|
||||
# install docker
|
||||
sudo apt install docker.io
|
||||
fi
|
||||
|
||||
# pull splah docker
|
||||
sudo docker pull scrapinghub/splash
|
||||
|
|
Loading…
Reference in New Issue