fix python3 types issue

2018-04-16 17:00:44 +02:00 · 2018-04-16 17:00:44 +02:00 · 15ef02fe53
parent 9e07a0ebdb
commit 15ef02fe53
5 changed files with 26 additions and 15 deletions
--- a/bin/Onion.py
+++ b/bin/Onion.py
@ -41,7 +41,7 @@ def fetch(p, r_cache, urls, domains, path):
    for url, domain in zip(urls, domains):
        if r_cache.exists(url) or url in failed:
            continue
-        to_fetch = base64.standard_b64encode(url)
+        to_fetch = base64.standard_b64encode(url.encode('utf8'))
        process = subprocess.Popen(["python", './tor_fetcher.py', to_fetch],
                                   stdout=subprocess.PIPE)
        while process.poll() is None:
--- a/bin/SQLInjectionDetection.py
+++ b/bin/SQLInjectionDetection.py
@ -66,10 +66,10 @@ def analyse(url, path):
    result_query = 0

    if resource_path is not None:
-        result_path = is_sql_injection(resource_path)
+        result_path = is_sql_injection(resource_path.decode('utf8'))

    if query_string is not None:
-        result_query = is_sql_injection(query_string)
+        result_query = is_sql_injection(query_string.decode('utf8'))

    if (result_path > 0) or (result_query > 0):
        paste = Paste.Paste(path)
@ -93,7 +93,7 @@ def analyse(url, path):
 # defined above on it.
 def is_sql_injection(url_parsed):
    line = urllib.request.unquote(url_parsed)
-    line = string.upper(line)
+    line = str.upper(line)
    result = []
    result_suspect = []

@ -104,12 +104,12 @@ def is_sql_injection(url_parsed):

    for word_list in word_injection:
        for word in word_list:
-            temp_res = string.find(line, string.upper(word))
+            temp_res = str.find(line, str.upper(word))
            if temp_res!=-1:
                result.append(line[temp_res:temp_res+len(word)])

    for word in word_injection_suspect:
-        temp_res = string.find(line, string.upper(word))
+        temp_res = str.find(line, str.upper(word))
        if temp_res!=-1:
            result_suspect.append(line[temp_res:temp_res+len(word)])

--- a/bin/Web.py
+++ b/bin/Web.py
@ -95,17 +95,23 @@ if __name__ == "__main__":
                    subdomain = faup.get_subdomain()
                    f1 = None

-                    domains_list.append(domain)
-
                    publisher.debug('{} Published'.format(url))

                    if f1 == "onion":
                        print(domain)

-                    hostl = unicode(avoidNone(subdomain)+avoidNone(domain))
+                    if subdomain is not None:
+                        subdomain = subdomain.decode('utf8')
+
+                    if domain is not None:
+                        domain = domain.decode('utf8')
+                        domains_list.append(domain)
+
+                    hostl = avoidNone(subdomain) + avoidNone(domain)
+
                    try:
                        socket.setdefaulttimeout(1)
-                        ip = socket.gethostbyname(unicode(hostl))
+                        ip = socket.gethostbyname(hostl)
                    except:
                        # If the resolver is not giving any IPv4 address,
                        # ASN/CC lookup is skip.
@ -113,10 +119,12 @@ if __name__ == "__main__":

                    try:
                        l = client.lookup(ip, qType='IP')
+
                    except ipaddress.AddressValueError:
                        continue
                    cc = getattr(l, 'cc')
-                    asn = getattr(l, 'asn')
+                    if getattr(l, 'asn') is not None:
+                        asn = getattr(l, 'asn')[2:] #remobe b'

                    # EU is not an official ISO 3166 code (but used by RIPE
                    # IP allocation)
@ -134,11 +142,13 @@ if __name__ == "__main__":

                A_values = lib_refine.checking_A_record(r_serv2,
                                                        domains_list)
+
                if A_values[0] >= 1:
                    PST.__setattr__(channel, A_values)
                    PST.save_attribute_redis(channel, (A_values[0],
                                             list(A_values[1])))

+
                    pprint.pprint(A_values)
                    publisher.info('Url;{};{};{};Checked {} URL;{}'.format(
                        PST.p_source, PST.p_date, PST.p_name, A_values[0], PST.p_path))
--- a/bin/WebStats.py
+++ b/bin/WebStats.py
@ -29,6 +29,7 @@ num_day_to_look = 5       # the detection of the progression start num_day_to_lo
 def analyse(server, field_name, date, url_parsed):
    field = url_parsed[field_name]
    if field is not None:
+        field = field.decode('utf8')
        server.hincrby(field, date, 1)
        if field_name == "domain": #save domain in a set for the monthly plot
            domain_set_name = "domain_set_" + date[0:6]
@ -179,7 +180,7 @@ if __name__ == '__main__':
            # Tld analysis
            analyse(r_serv_trend, 'tld', date, url_parsed)
            # Domain analysis
-	        analyse(r_serv_trend, 'domain', date, url_parsed)
+            analyse(r_serv_trend, 'domain', date, url_parsed)

            compute_progression(r_serv_trend, 'scheme', num_day_to_look, url_parsed)
            compute_progression(r_serv_trend, 'tld', num_day_to_look, url_parsed)
--- a/bin/tor_fetcher.py
+++ b/bin/tor_fetcher.py
@ -3,7 +3,7 @@

 import socks
 import socket
-import urllib2
+import urllib.request
 import StringIO
 import gzip
 import base64
@ -21,10 +21,10 @@ def create_connection(address, timeout=None, source_address=None):

 def get_page(url, torclient_host='127.0.0.1', torclient_port=9050):

-    request = urllib2.Request(url)
+    request = urllib.request.Request(url)
    # UA of the Tor browser bundle
    request.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; rv:24.0) Gecko/20100101 Firefox/24.0')
-    return urllib2.urlopen(request, timeout=5).read(max_size * 100000)
+    return urllib.request.urlopen(request, timeout=5).read(max_size * 100000)


 def makegzip64(s):