fix: [Crawler splash ResponseNeverReceived] add retry

pull/457/merge
Terrtia 2020-04-06 10:52:44 +02:00
parent 00573c9401
commit 672bb02bbf
No known key found for this signature in database
GPG Key ID: 1E1B1F50D84613D0
1 changed files with 20 additions and 1 deletions

View File

@ -172,7 +172,26 @@ class TorSplashCrawler():
# LUA ERROR # # TODO: print/display errors # LUA ERROR # # TODO: print/display errors
elif 'error' in response.data: elif 'error' in response.data:
if(response.data['error'] == 'network99'): if(response.data['error'] == 'network99'):
print('Connection to proxy refused') ## splash restart ##
error_retry = request.meta.get('error_retry', 0)
if error_retry < 3:
error_retry += 1
url= request.meta['current_url']
father = request.meta['father']
self.logger.error('Splash, ResponseNeverReceived for %s, retry in 10s ...', url)
time.sleep(10)
yield SplashRequest(
url,
self.parse,
errback=self.errback_catcher,
endpoint='execute',
cache_args=['lua_source'],
meta={'father': father, 'current_url': url, 'error_retry' = error_retry},
args=self.build_request_arg(response.cookiejar)
)
else:
print('Connection to proxy refused')
else: else:
print(response.data['error']) print(response.data['error'])