chg: [Crawler] add domains blacklist

pull/260/head
Terrtia 2018-09-28 16:29:09 +02:00
parent b3a6dc8487
commit 6328cc22b7
No known key found for this signature in database
GPG Key ID: 1E1B1F50D84613D0
4 changed files with 17 additions and 1 deletions

View File

@ -130,6 +130,16 @@ if __name__ == '__main__':
db=p.config.getint("ARDB_Onion", "db"), db=p.config.getint("ARDB_Onion", "db"),
decode_responses=True) decode_responses=True)
# load domains blacklist
try:
with open(os.environ['AIL_BIN']+'/torcrawler/blacklist_onion.txt', 'r') as f:
r_onion.delete('blacklist_{}'.format(type_hidden_service))
lines = f.read().splitlines()
for line in lines:
r_onion.sadd('blacklist_{}'.format(type_hidden_service), line)
except Exception:
pass
while True: while True:
# Recovering the streamed message informations. # Recovering the streamed message informations.
@ -160,7 +170,7 @@ if __name__ == '__main__':
print('domain: {}'.format(domain)) print('domain: {}'.format(domain))
print('domain_url: {}'.format(domain_url)) print('domain_url: {}'.format(domain_url))
if not r_onion.sismember('banned_{}'.format(type_hidden_service), domain): if not r_onion.sismember('blacklist_{}'.format(type_hidden_service), domain):
date = datetime.datetime.now().strftime("%Y%m%d") date = datetime.datetime.now().strftime("%Y%m%d")
date_month = datetime.datetime.now().strftime("%Y%m") date_month = datetime.datetime.now().strftime("%Y%m")

View File

@ -0,0 +1 @@
www.facebookcorewwwi.onion

View File

@ -75,6 +75,7 @@ def hiddenServices_page():
statDomains['domains_up'] = r_serv_onion.scard('onion_up:{}'.format(date)) statDomains['domains_up'] = r_serv_onion.scard('onion_up:{}'.format(date))
statDomains['domains_down'] = r_serv_onion.scard('onion_down:{}'.format(date)) statDomains['domains_down'] = r_serv_onion.scard('onion_down:{}'.format(date))
statDomains['total'] = statDomains['domains_up'] + statDomains['domains_down'] statDomains['total'] = statDomains['domains_up'] + statDomains['domains_down']
statDomains['domains_queue'] = r_serv_onion.scard('onion_domain_crawler_queue')
for onion in last_onions: for onion in last_onions:
metadata_onion = {} metadata_onion = {}

View File

@ -120,6 +120,10 @@
<tr> <tr>
<td>Crawled Domains</td> <td>Crawled Domains</td>
<td>{{ statDomains['total'] }}</td> <td>{{ statDomains['total'] }}</td>
</tr>
<tr>
<td>Domains in Queue</td>
<td>{{ statDomains['domains_queue'] }}</td>
</tr> </tr>
</tbody> </tbody>
</table> </table>