mirror of https://github.com/CIRCL/AIL-framework
Merge branch 'master' of https://github.com/CIRCL/AIL-framework
commit
4b0a7210b8
|
@ -12,6 +12,7 @@ import time
|
||||||
import subprocess
|
import subprocess
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
|
from collections import deque
|
||||||
from pyfaup.faup import Faup
|
from pyfaup.faup import Faup
|
||||||
|
|
||||||
sys.path.append(os.environ['AIL_BIN'])
|
sys.path.append(os.environ['AIL_BIN'])
|
||||||
|
@ -303,7 +304,7 @@ if __name__ == '__main__':
|
||||||
#mode = sys.argv[1]
|
#mode = sys.argv[1]
|
||||||
splash_port = sys.argv[1]
|
splash_port = sys.argv[1]
|
||||||
|
|
||||||
rotation_mode = ['onion', 'regular']
|
rotation_mode = deque(['onion', 'regular'])
|
||||||
default_proto_map = {'http': 80, 'https': 443}
|
default_proto_map = {'http': 80, 'https': 443}
|
||||||
######################################################## add ftp ???
|
######################################################## add ftp ???
|
||||||
|
|
||||||
|
@ -361,6 +362,7 @@ if __name__ == '__main__':
|
||||||
|
|
||||||
update_auto_crawler()
|
update_auto_crawler()
|
||||||
|
|
||||||
|
rotation_mode.rotate()
|
||||||
to_crawl = get_elem_to_crawl(rotation_mode)
|
to_crawl = get_elem_to_crawl(rotation_mode)
|
||||||
if to_crawl:
|
if to_crawl:
|
||||||
url_data = unpack_url(to_crawl['url'])
|
url_data = unpack_url(to_crawl['url'])
|
||||||
|
|
|
@ -125,7 +125,15 @@ class Paste(object):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
try:
|
||||||
paste = self.cache.get(self.p_path)
|
paste = self.cache.get(self.p_path)
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
paste = None
|
||||||
|
except Exception as e:
|
||||||
|
print("ERROR in: " + self.p_path)
|
||||||
|
print(e)
|
||||||
|
paste = None
|
||||||
|
|
||||||
if paste is None:
|
if paste is None:
|
||||||
try:
|
try:
|
||||||
with gzip.open(self.p_path, 'r') as f:
|
with gzip.open(self.p_path, 'r') as f:
|
||||||
|
|
Loading…
Reference in New Issue