mirror of https://github.com/CIRCL/AIL-framework
chg: [Crawler] add priority queue, fix #263
parent
c1b34bd99c
commit
88eaaeae93
|
@ -20,7 +20,7 @@ def on_error_send_message_back_in_queue(type_hidden_service, domain, message):
|
|||
# send this msg back in the queue
|
||||
if not r_onion.sismember('{}_domain_crawler_queue'.format(type_hidden_service), domain):
|
||||
r_onion.sadd('{}_domain_crawler_queue'.format(type_hidden_service), domain)
|
||||
r_onion.sadd('{}_crawler_queue'.format(type_hidden_service), message)
|
||||
r_onion.sadd('{}_crawler_priority_queue'.format(type_hidden_service), message)
|
||||
|
||||
def crawl_onion(url, domain, date, date_month, message):
|
||||
|
||||
|
@ -166,6 +166,10 @@ if __name__ == '__main__':
|
|||
|
||||
while True:
|
||||
|
||||
# Priority Queue - Recovering the streamed message informations.
|
||||
message = r_onion.spop('{}_crawler_priority_queue'.format(type_hidden_service))
|
||||
|
||||
if message is None:
|
||||
# Recovering the streamed message informations.
|
||||
message = r_onion.spop('{}_crawler_queue'.format(type_hidden_service))
|
||||
|
||||
|
|
|
@ -223,6 +223,10 @@ if __name__ == "__main__":
|
|||
print('send to onion crawler')
|
||||
r_onion.sadd('onion_domain_crawler_queue', domain)
|
||||
msg = '{};{}'.format(url,PST.p_path)
|
||||
if not r_onion.hexists('onion_metadata:{}'.format(domain), 'first_seen'):
|
||||
r_onion.sadd('onion_crawler_priority_queue', msg)
|
||||
print('send to priority queue)
|
||||
else:
|
||||
r_onion.sadd('onion_crawler_queue', msg)
|
||||
#p.populate_set_out(msg, 'Crawler')
|
||||
|
||||
|
|
Loading…
Reference in New Issue