From 4b3101b7b6423237c339e3b4f0676b7504febd5d Mon Sep 17 00:00:00 2001 From: Mokaddem Date: Fri, 1 Jul 2016 16:59:08 +0200 Subject: [PATCH] Added template tld. Modified URL using Faup and refactored WebStats. --- bin/LAUNCH.sh | 6 +- bin/Url.py | 45 ++-- bin/WebStats.py | 62 +++--- files/protocolsfile | 103 ++++++++- files/tldsfile | 263 +++++++++++++++++++++++ var/www/Flask_server.py | 9 + var/www/templates/Protocolstrending.html | 10 +- var/www/templates/Tldstrending.html | 196 +++++++++++++++++ var/www/templates/Wordstrending.html | 2 +- var/www/templates/index.html | 2 +- var/www/templates/search.html | 2 +- 11 files changed, 634 insertions(+), 66 deletions(-) create mode 100644 files/tldsfile create mode 100644 var/www/templates/Tldstrending.html diff --git a/bin/LAUNCH.sh b/bin/LAUNCH.sh index 330a23ce..fc8c9ff1 100755 --- a/bin/LAUNCH.sh +++ b/bin/LAUNCH.sh @@ -107,11 +107,11 @@ function launching_scripts { sleep 0.1 screen -S "Script" -X screen -t "Duplicate" bash -c './Duplicate.py; read x' sleep 0.1 - #screen -S "Script" -X screen -t "Attribute" bash -c './Attribute.py; read x' + screen -S "Script" -X screen -t "Attribute" bash -c './Attribute.py; read x' sleep 0.1 screen -S "Script" -X screen -t "Line" bash -c './Line.py; read x' sleep 0.1 - #screen -S "Script" -X screen -t "DomainClassifier" bash -c './DomClassifier.py; read x' + screen -S "Script" -X screen -t "DomainClassifier" bash -c './DomClassifier.py; read x' sleep 0.1 screen -S "Script" -X screen -t "Categ" bash -c './Categ.py; read x' sleep 0.1 @@ -139,7 +139,7 @@ function launching_scripts { sleep 0.1 screen -S "Script" -X screen -t "Cve" bash -c './Cve.py; read x' sleep 0.1 - #screen -S "Script" -X screen -t "WebStats" bash -c './WebStats.py; read x' + screen -S "Script" -X screen -t "WebStats" bash -c './WebStats.py; read x' } #If no params, display the help diff --git a/bin/Url.py b/bin/Url.py index d33e7e9f..ba3842af 100755 --- a/bin/Url.py +++ b/bin/Url.py @@ -7,6 +7,8 @@ import dns.exception from packages import Paste from packages import lib_refine from pubsublogger import publisher +from pyfaup.faup import Faup +import re # Country and ASN lookup from cymru.ip2asn.dns import DNSClient as ip2asn @@ -16,6 +18,12 @@ import ipaddress from Helper import Process +def avoidNone(str): + if str is None: + return "" + else: + return str + if __name__ == "__main__": publisher.port = 6380 publisher.channel = "Script" @@ -41,6 +49,7 @@ if __name__ == "__main__": message = p.get_from_set() prec_filename = None + faup = Faup() url_regex = "(http|https|ftp)\://([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.(com|edu|gov|int|mil|net|org|biz|arpa|info|name|pro|aero|coop|museum|[a-zA-Z]{2}))(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*" @@ -53,33 +62,27 @@ if __name__ == "__main__": PST = Paste.Paste(filename) client = ip2asn() for x in PST.get_regex(url_regex): - scheme, credential, subdomain, domain, host, tld, \ - port, resource_path, query_string, f1, f2, f3, \ - f4 = x - domains_list.append(domain) -# p.populate_set_out(x, 'Url') - temp_x = () - for i in range(0,13): - if x[i] == '': - temp_x += ('None', ) - else: - temp_x += (x[i], ) - temp_scheme, temp_credential, temp_subdomain, temp_domain, temp_host, temp_tld, \ - temp_port, temp_resource_path, temp_query_string, temp_f1, temp_f2, temp_f3, \ - temp_f4 = temp_x + matching_url = re.search(url_regex, PST.get_p_content()) + url = matching_url.group(0) - to_send = '{} {} {} {} {} {} {} {} {} {} {} {} {} {}'.format(temp_scheme, \ - temp_subdomain, temp_credential, temp_domain, temp_host, temp_tld, temp_port, temp_resource_path,\ - temp_query_string, temp_f1, temp_f2, temp_f3, temp_f4, PST._get_p_date()) - p.populate_set_out(to_send , 'Url') - publisher.debug('{} Published'.format(x)) + to_send = "{} {}".format(url, PST._get_p_date()) + p.populate_set_out(to_send, 'Url') + + faup.decode(url) + domain = faup.get_domain() + subdomain = faup.get_subdomain() + f1 = None + + domains_list.append(domain) + + publisher.debug('{} Published'.format(url)) if f1 == "onion": print domain - hostl = unicode(subdomain+domain) + hostl = unicode(avoidNone(subdomain)+avoidNone(domain)) try: - socket.setdefaulttimeout(2) + socket.setdefaulttimeout(1) ip = socket.gethostbyname(unicode(hostl)) except: # If the resolver is not giving any IPv4 address, diff --git a/bin/WebStats.py b/bin/WebStats.py index 15508e52..5da443a8 100755 --- a/bin/WebStats.py +++ b/bin/WebStats.py @@ -5,14 +5,24 @@ """ import time +import datetime import re import redis import os +from packages import lib_words from pubsublogger import publisher from packages import Paste from Helper import Process +from pyfaup.faup import Faup - +def analyse(field_name): + field = url_parsed[field_name] + if field is not None: + prev_score = r_serv1.hget(field, date) + if prev_score is not None: + r_serv1.hset(field, date, int(prev_score) + 1) + else: + r_serv1.hset(field, date, 1) if __name__ == '__main__': # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh) @@ -37,16 +47,22 @@ if __name__ == '__main__': db=p.config.get("Redis_Level_DB", "db")) # FILE CURVE SECTION # - csv_path = os.path.join(os.environ['AIL_HOME'], + csv_path_proto = os.path.join(os.environ['AIL_HOME'], p.config.get("Directories", "protocolstrending_csv")) protocolsfile_path = os.path.join(os.environ['AIL_HOME'], p.config.get("Directories", "protocolsfile")) + + csv_path_tld = os.path.join(os.environ['AIL_HOME'], + p.config.get("Directories", "tldstrending_csv")) + tldsfile_path = os.path.join(os.environ['AIL_HOME'], + p.config.get("Directories", "tldsfile")) + faup = Faup() + generate_new_graph = False # Endless loop getting messages from the input queue while True: # Get one message from the input queue message = p.get_from_set() - generate_new_graph = False if message is None: if generate_new_graph: @@ -55,10 +71,15 @@ if __name__ == '__main__': today = datetime.date.today() year = today.year month = today.month - lib_words.create_curve_with_word_file(r_serv1, csv_path, + + lib_words.create_curve_with_word_file(r_serv1, csv_path_proto, protocolsfile_path, year, month) + lib_words.create_curve_with_word_file(r_serv1, csv_path_tld, + tldsfile_path, year, + month) + publisher.debug("{} queue is empty, waiting".format(config_section)) time.sleep(1) continue @@ -66,30 +87,9 @@ if __name__ == '__main__': else: generate_new_graph = True # Do something with the message from the queue - scheme, credential, subdomain, domain, host, tld, \ - port, resource_path, query_string, f1, f2, f3, \ - f4 , date= message.split() - - prev_score = r_serv1.hget(scheme, date) - if prev_score is not None: - r_serv1.hset(scheme, date, int(prev_score) + int(score)) - else: - r_serv1.hset(scheme, date, score) - - - - - - - - - - - - - - - - - - + url, date = message.split() + faup.decode(url) + url_parsed = faup.get() + + analyse('scheme') #Scheme analysis + analyse('tld') #Tld analysis diff --git a/files/protocolsfile b/files/protocolsfile index f36a40f5..ffece4b6 100644 --- a/files/protocolsfile +++ b/files/protocolsfile @@ -1,3 +1,100 @@ -FTP -HTTP -HTTPS +afs +file +ftp +z39.50 +z39.50r +z39.50s +vemmi +urn +nfs +dict +acap +rtspu +rtsp +rtsps +tip +pop +cid +mid +data +thismessage +service +shttp +fax +modem +tv +sip +sips +go +icap +h323 +ipp +xmlrpc.beep +xmlrpc.beeps +tftp +mupdate +pres +im +mtqp +tel +iris +iris.beep +crid +snmp +tag +wais +prospero +soap.beep +soap.beeps +telnet +gopher +cap +info +dns +ldap +dav +opaquelocktoken +msrp +msrps +dtn +imap +xmpp +iax +news +nntp +snews +sms +rsync +sieve +geo +mailto +jms +mailserver +ipn +tn3270 +ws +wss +xcon +xcon-userid +about +aaa +aaas +session +ni +nih +reload +ham +stun +stuns +turn +turns +http +https +coap +coaps +rtmfp +ipps +pkcs11 +acct +example +vnc diff --git a/files/tldsfile b/files/tldsfile new file mode 100644 index 00000000..73bff3af --- /dev/null +++ b/files/tldsfile @@ -0,0 +1,263 @@ +com +org +net +int +edu +gov +mil +arpa +ac +ad +ae +af +ag +ai +al +am +an +ao +aq +ar +as +at +au +aw +ax +az +ba +bb +bd +be +bf +bg +bh +bi +bj +bl +bm +bn +bo +bq +br +bs +bt +bv +bw +by +bz +ca +cc +cd +cf +cg +ch +ci +ck +cl +cm +cn +co +cr +cu +cv +cw +cx +cy +cz +de +dj +dk +dm +do +dz +ec +ee +eg +eh +er +es +et +eu +fi +fj +fk +fm +fo +fr +ga +gb +gd +ge +gf +gg +gh +gi +gl +gm +gn +gp +gq +gr +gs +gt +gu +gw +gy +hk +hm +hn +hr +ht +hu +id +ie +il +im +in +io +iq +ir +is +it +je +jm +jo +jp +ke +kg +kh +ki +km +kn +kp +kr +kw +ky +kz +la +lb +lc +li +lk +lr +ls +lt +lu +lv +ly +ma +mc +md +me +mf +mg +mh +mk +ml +mm +mn +mo +mp +mq +mr +ms +mt +mu +mv +mw +mx +my +mz +na +nc +ne +nf +ng +ni +nl +no +np +nr +nu +nz +om +pa +pe +pf +pg +ph +pk +pl +pm +pn +pr +ps +pt +pw +py +qa +re +ro +rs +ru +rw +sa +sb +sc +sd +se +sg +sh +si +sj +sk +sl +sm +sn +so +sr +ss +st +su +sv +sx +sy +sz +tc +td +tf +tg +th +tj +tk +tl +tm +tn +to +tp +tr +tt +tv +tw +tz +ua +ug +uk +um +us +uy +uz +va +vc +ve +vg +vi +vn +vu +wf +ws +ye +yt +za +zm +zw diff --git a/var/www/Flask_server.py b/var/www/Flask_server.py index 7a1aa472..9c152cab 100755 --- a/var/www/Flask_server.py +++ b/var/www/Flask_server.py @@ -96,5 +96,14 @@ def wordstrending(): return render_template("Wordstrending.html") +@app.route("/protocolstrending/") +def protocolstrending(): + return render_template("Protocolstrending.html") + +@app.route("/tldstrending/") +def tldstrending(): + return render_template("Tldstrending.html") + + if __name__ == "__main__": app.run(host='0.0.0.0', port=7000, threaded=True) diff --git a/var/www/templates/Protocolstrending.html b/var/www/templates/Protocolstrending.html index 795ca0ab..6183f2ff 100644 --- a/var/www/templates/Protocolstrending.html +++ b/var/www/templates/Protocolstrending.html @@ -24,8 +24,8 @@