2016-07-25 18:48:08 +02:00
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import json
import os
import collections
2016-07-28 11:54:14 +02:00
import re
2017-07-25 15:06:37 +02:00
import sys
2016-10-13 17:23:10 +02:00
from json import JSONEncoder
2016-07-25 18:48:08 +02:00
2016-07-28 16:01:27 +02:00
try :
import requests
HAS_REQUESTS = True
except ImportError :
HAS_REQUESTS = False
2017-07-25 16:19:34 +02:00
try :
import jsonschema
HAS_JSONSCHEMA = True
except ImportError :
HAS_JSONSCHEMA = False
2016-07-25 18:48:08 +02:00
2016-10-13 17:23:10 +02:00
class EncodeTaxonomies ( JSONEncoder ) :
def default ( self , obj ) :
2017-11-01 22:13:24 +01:00
if isinstance ( obj , ( Taxonomy , Predicate , Entry ) ) :
return obj . to_dict ( )
return JSONEncoder . default ( self , obj )
2016-10-13 17:23:10 +02:00
2016-07-25 18:48:08 +02:00
class Entry ( ) :
2017-11-01 22:13:24 +01:00
def __init__ ( self , entry ) :
self . value = entry [ ' value ' ]
self . expanded = entry . get ( ' expanded ' )
self . colour = entry . get ( ' colour ' )
self . description = entry . get ( ' description ' )
self . numerical_value = entry . get ( ' numerical_value ' )
def to_dict ( self ) :
to_return = { ' value ' : self . value }
if self . expanded :
to_return [ ' expanded ' ] = self . expanded
if self . colour :
to_return [ ' colour ' ] = self . colour
if self . description :
to_return [ ' description ' ] = self . description
if self . numerical_value is not None :
to_return [ ' numerical_value ' ] = self . numerical_value
return to_return
def to_json ( self ) :
return json . dumps ( self , cls = EncodeTaxonomies )
2016-07-25 18:48:08 +02:00
2016-07-26 10:30:25 +02:00
def __str__ ( self ) :
return self . value
2016-07-25 18:48:08 +02:00
class Predicate ( collections . Mapping ) :
2017-11-01 22:13:24 +01:00
def __init__ ( self , predicate , entries ) :
self . predicate = predicate [ ' value ' ]
self . expanded = predicate . get ( ' expanded ' )
self . description = predicate . get ( ' description ' )
self . colour = predicate . get ( ' colour ' )
2017-11-29 10:06:25 +01:00
self . exclusive = predicate . get ( ' exclusive ' )
2016-10-13 17:23:10 +02:00
self . __init_entries ( entries )
2016-07-25 18:48:08 +02:00
def __init_entries ( self , entries ) :
2016-10-13 17:23:10 +02:00
self . entries = { }
if entries :
for e in entries :
2017-11-01 22:13:24 +01:00
self . entries [ e [ ' value ' ] ] = Entry ( e )
def to_dict ( self ) :
to_return = { ' value ' : self . predicate }
if self . expanded :
to_return [ ' expanded ' ] = self . expanded
if self . description :
to_return [ ' description ' ] = self . description
if self . colour :
to_return [ ' colour ' ] = self . colour
2017-11-29 10:06:25 +01:00
if self . exclusive :
to_return [ ' exclusive ' ] = self . exclusive
2017-11-01 22:13:24 +01:00
if self . entries :
to_return [ ' entries ' ] = self . values ( )
return to_return
def to_json ( self ) :
return json . dumps ( self , cls = EncodeTaxonomies )
2016-07-25 18:48:08 +02:00
2016-07-26 10:30:25 +02:00
def __str__ ( self ) :
return self . predicate
2016-07-25 18:48:08 +02:00
def __getitem__ ( self , entry ) :
return self . entries [ entry ]
def __iter__ ( self ) :
return iter ( self . entries )
def __len__ ( self ) :
2017-07-26 16:58:40 +02:00
return len ( self . entries )
2016-07-25 18:48:08 +02:00
class Taxonomy ( collections . Mapping ) :
def __init__ ( self , taxonomy ) :
self . taxonomy = taxonomy
self . name = self . taxonomy [ ' namespace ' ]
self . description = self . taxonomy [ ' description ' ]
self . version = self . taxonomy [ ' version ' ]
2016-10-05 18:10:23 +02:00
self . expanded = self . taxonomy . get ( ' expanded ' )
2016-10-12 14:22:19 +02:00
self . refs = self . taxonomy . get ( ' refs ' )
2017-07-19 17:59:01 +02:00
self . type = self . taxonomy . get ( ' type ' )
2017-11-29 10:06:25 +01:00
self . exclusive = self . taxonomy . get ( ' exclusive ' )
2016-07-25 18:48:08 +02:00
self . __init_predicates ( )
def __init_predicates ( self ) :
self . predicates = { }
entries = { }
if self . taxonomy . get ( ' values ' ) :
for v in self . taxonomy [ ' values ' ] :
if not entries . get ( v [ ' predicate ' ] ) :
entries [ v [ ' predicate ' ] ] = [ ]
entries [ v [ ' predicate ' ] ] + = v [ ' entry ' ]
for p in self . taxonomy [ ' predicates ' ] :
2017-11-01 22:13:24 +01:00
self . predicates [ p [ ' value ' ] ] = Predicate ( p , entries . get ( p [ ' value ' ] ) )
def to_json ( self ) :
return json . dumps ( self , cls = EncodeTaxonomies )
def to_dict ( self ) :
to_return = { ' namespace ' : self . name , ' description ' : self . description ,
' version ' : self . version }
2016-10-13 17:23:10 +02:00
if self . expanded :
to_return [ ' expanded ' ] = self . expanded
if self . refs :
to_return [ ' refs ' ] = self . refs
2017-07-19 17:59:01 +02:00
if self . type :
to_return [ ' type ' ] = self . type
2017-11-29 10:06:25 +01:00
if self . exclusive :
to_return [ ' exclusive ' ] = self . exclusive
2017-11-01 22:13:24 +01:00
predicates = [ p . to_dict ( ) for p in self . values ( ) ]
entries = [ ]
for p in predicates :
if p . get ( ' entries ' ) is None :
continue
2017-11-29 10:06:25 +01:00
entries . append ( { ' predicate ' : p [ ' value ' ] , ' entry ' : [ e . to_dict ( ) for e in p . pop ( ' entries ' ) ] } )
2017-11-01 22:13:24 +01:00
to_return [ ' predicates ' ] = predicates
if entries :
to_return [ ' values ' ] = entries
2016-10-13 17:23:10 +02:00
return to_return
2016-10-05 18:10:23 +02:00
def has_entries ( self ) :
2017-07-26 16:58:40 +02:00
if self . values ( ) :
for p in self . values ( ) :
2016-10-14 18:24:21 +02:00
if p . entries :
return True
2016-10-05 18:10:23 +02:00
return False
2016-07-25 18:48:08 +02:00
def __str__ ( self ) :
2016-07-28 11:54:14 +02:00
return ' \n ' . join ( self . machinetags ( ) )
2016-07-26 10:30:25 +02:00
2016-10-05 18:10:23 +02:00
def make_machinetag ( self , predicate , entry = None ) :
if entry :
return ' {} : {} = " {} " ' . format ( self . name , predicate , entry )
else :
return ' {} : {} ' . format ( self . name , predicate )
2016-07-26 10:30:25 +02:00
def machinetags ( self ) :
2016-07-28 11:54:14 +02:00
to_return = [ ]
2017-07-26 16:58:40 +02:00
for p , content in self . items ( ) :
2016-07-25 18:48:08 +02:00
if content :
for k in content . keys ( ) :
2016-07-28 11:54:14 +02:00
to_return . append ( ' {} : {} = " {} " ' . format ( self . name , p , k ) )
2016-07-25 18:48:08 +02:00
else :
2016-07-28 11:54:14 +02:00
to_return . append ( ' {} : {} ' . format ( self . name , p ) )
2016-07-25 18:48:08 +02:00
return to_return
def __getitem__ ( self , predicate ) :
return self . predicates [ predicate ]
def __iter__ ( self ) :
return iter ( self . predicates )
def __len__ ( self ) :
return len ( self . predicates )
def amount_entries ( self ) :
2016-10-05 18:10:23 +02:00
if self . has_entries ( ) :
2017-07-26 16:58:40 +02:00
return sum ( [ len ( e ) for e in self . values ( ) ] )
2016-10-05 18:10:23 +02:00
else :
2017-07-26 16:58:40 +02:00
return len ( self . keys ( ) )
2016-07-25 18:48:08 +02:00
2016-07-26 10:30:25 +02:00
def machinetags_expanded ( self ) :
2016-07-28 11:54:14 +02:00
to_return = [ ]
2017-07-26 16:58:40 +02:00
for p , content in self . items ( ) :
2016-07-25 18:48:08 +02:00
if content :
for k , entry in content . items ( ) :
2016-10-12 14:43:22 +02:00
to_return . append ( ' {} : {} = " {} " ' . format ( self . name , p , entry . expanded ) )
2016-07-25 18:48:08 +02:00
else :
2016-07-28 11:54:14 +02:00
to_return . append ( ' {} : {} ' . format ( self . name , p ) )
2016-07-25 18:48:08 +02:00
return to_return
class Taxonomies ( collections . Mapping ) :
def __init__ ( self , manifest_url = ' https://raw.githubusercontent.com/MISP/misp-taxonomies/master/MANIFEST.json ' ,
2017-07-25 15:06:37 +02:00
manifest_path = os . path . join ( os . path . abspath ( os . path . dirname ( sys . modules [ ' pytaxonomies ' ] . __file__ ) ) ,
' data ' , ' misp-taxonomies ' , ' MANIFEST.json ' ) ) :
2016-07-25 18:48:08 +02:00
if manifest_path :
self . loader = self . __load_path
self . manifest = self . loader ( manifest_path )
else :
self . loader = self . __load_url
self . manifest = self . loader ( manifest_url )
if manifest_path :
self . url = os . path . dirname ( os . path . realpath ( manifest_path ) )
2016-07-25 19:38:17 +02:00
else :
self . url = self . manifest [ ' url ' ]
2016-07-25 18:48:08 +02:00
self . version = self . manifest [ ' version ' ]
self . license = self . manifest [ ' license ' ]
self . description = self . manifest [ ' description ' ]
self . __init_taxonomies ( )
2017-07-25 16:19:34 +02:00
def validate_with_schema ( self ) :
if not HAS_JSONSCHEMA :
raise ImportError ( ' jsonschema is required: pip install jsonschema ' )
2017-11-29 10:06:25 +01:00
schema = os . path . join ( os . path . abspath ( os . path . dirname ( sys . modules [ ' pytaxonomies ' ] . __file__ ) ) , ' data ' , ' misp-taxonomies ' , ' schema.json ' )
2017-07-25 16:19:34 +02:00
with open ( schema , ' r ' ) as f :
loaded_schema = json . load ( f )
2017-07-26 16:58:40 +02:00
for t in self . values ( ) :
2017-07-25 16:19:34 +02:00
jsonschema . validate ( t . taxonomy , loaded_schema )
2016-07-25 18:48:08 +02:00
def __load_path ( self , path ) :
with open ( path , ' r ' ) as f :
return json . load ( f )
def __load_url ( self , url ) :
2016-07-28 16:01:27 +02:00
if not HAS_REQUESTS :
raise Exception ( " Python module ' requests ' isn ' t installed, unable to fetch the taxonomies. " )
2016-07-25 18:48:08 +02:00
return requests . get ( url ) . json ( )
def __make_uri ( self , taxonomy_name ) :
2016-07-25 19:38:17 +02:00
return ' {} / {} / {} ' . format ( self . url , taxonomy_name , self . manifest [ ' path ' ] )
2016-07-25 18:48:08 +02:00
def __init_taxonomies ( self ) :
self . taxonomies = { }
for t in self . manifest [ ' taxonomies ' ] :
uri = self . __make_uri ( t [ ' name ' ] )
tax = self . loader ( uri )
self . taxonomies [ t [ ' name ' ] ] = Taxonomy ( tax )
2016-10-05 18:10:23 +02:00
if t [ ' name ' ] != self . taxonomies [ t [ ' name ' ] ] . name :
raise Exception ( " The name of the taxonomy in the manifest ( {} ) doesn ' t match with the name in the taxonomy ( {} ) " . format ( t [ ' name ' ] , self . taxonomies [ t [ ' name ' ] ] . name ) )
2016-07-25 18:48:08 +02:00
def __getitem__ ( self , name ) :
return self . taxonomies [ name ]
def __iter__ ( self ) :
return iter ( self . taxonomies )
def __len__ ( self ) :
return len ( self . taxonomies )
def __str__ ( self ) :
2016-07-28 11:54:14 +02:00
to_print = ' '
2017-07-26 16:58:40 +02:00
for taxonomy in self . values ( ) :
2016-07-29 11:28:16 +02:00
to_print + = " {} \n \n " . format ( str ( taxonomy ) )
2016-07-28 11:54:14 +02:00
return to_print
def search ( self , query , expanded = False ) :
query = query . lower ( )
to_return = [ ]
2017-07-26 16:58:40 +02:00
for taxonomy in self . values ( ) :
2016-07-28 11:54:14 +02:00
if expanded :
machinetags = taxonomy . machinetags_expanded ( )
else :
machinetags = taxonomy . machinetags ( )
for mt in machinetags :
entries = [ e . lower ( ) for e in re . findall ( ' [^:= " ]* ' , mt ) if e ]
for e in entries :
if e . startswith ( query ) or e . endswith ( query ) :
to_return . append ( mt )
2016-07-25 18:48:08 +02:00
return to_return
2016-07-28 11:54:14 +02:00
2016-10-05 18:10:23 +02:00
def revert_machinetag ( self , machinetag ) :
if ' = ' in machinetag :
name , predicat , entry = re . findall ( ' ^([^:]*):([^=]*)= " ([^ " ]*) " $ ' , machinetag ) [ 0 ]
else :
name , predicat = re . findall ( ' ^([^:]*):([^=]*)$ ' , machinetag ) [ 0 ]
entry = None
if entry :
return self . taxonomies [ name ] , self . taxonomies [ name ] [ predicat ] , self . taxonomies [ name ] [ predicat ] [ entry ]
else :
return self . taxonomies [ name ] , self . taxonomies [ name ] [ predicat ]
2016-07-28 11:54:14 +02:00
def all_machinetags ( self , expanded = False ) :
if expanded :
2017-07-26 16:58:40 +02:00
return [ taxonomy . machinetags_expanded ( ) for taxonomy in self . values ( ) ]
return [ taxonomy . machinetags ( ) for taxonomy in self . values ( ) ]