mirror of https://github.com/CIRCL/AIL-framework
fix: [Install] add python requirements
parent
c12df31533
commit
a94a0fe1a7
|
@ -1,70 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*-coding:UTF-8 -*
|
||||
|
||||
|
||||
from packages import Paste
|
||||
from Helper import Process
|
||||
from pubsublogger import publisher
|
||||
|
||||
import time
|
||||
import redis
|
||||
import newspaper
|
||||
|
||||
from collections import defaultdict
|
||||
|
||||
from newspaper import fulltext
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
publisher.port = 6380
|
||||
publisher.channel = "Script"
|
||||
|
||||
publisher.info("Script DomainSubject started")
|
||||
|
||||
config_section = 'DomainSubject'
|
||||
p = Process(config_section)
|
||||
|
||||
r_onion = redis.StrictRedis(
|
||||
host=p.config.get("ARDB_Onion", "host"),
|
||||
port=p.config.getint("ARDB_Onion", "port"),
|
||||
db=p.config.getint("ARDB_Onion", "db"),
|
||||
decode_responses=True)
|
||||
|
||||
|
||||
while True:
|
||||
|
||||
# format: <domain>
|
||||
domain = p.get_from_set()
|
||||
domain = 'easycoinsayj7p5l.onion'
|
||||
|
||||
if domain is not None:
|
||||
|
||||
#retrieve all crawled pastes
|
||||
set_crawles_pastes = r_onion.smembers('temp:crawled_domain_pastes:{}'.format(domain))
|
||||
if set_crawles_pastes:
|
||||
dict_keyword = defaultdict(int)
|
||||
|
||||
for paste_path in set_crawles_pastes:
|
||||
|
||||
paste = Paste.Paste(paste_path)
|
||||
content = paste.get_p_content()
|
||||
|
||||
article = newspaper.Article(url='')
|
||||
article.set_html(content)
|
||||
article.parse()
|
||||
article.nlp()
|
||||
|
||||
for keyword in article.keywords:
|
||||
dict_keyword[keyword] += 1
|
||||
|
||||
|
||||
if dict_keyword:
|
||||
res = [(k, dict_keyword[k]) for k in sorted(dict_keyword, key=dict_keyword.get, reverse=True)]
|
||||
for item in res:
|
||||
print(item)
|
||||
else:
|
||||
print('no keywords found')
|
||||
time.sleep(60)
|
||||
|
||||
else:
|
||||
time.sleep(5)
|
|
@ -70,3 +70,6 @@ https://github.com/saffsd/langid.py/archive/master.zip
|
|||
|
||||
#LibInjection bindings
|
||||
pylibinjection
|
||||
|
||||
# Graph
|
||||
matplotlib
|
||||
|
|
Loading…
Reference in New Issue