mirror of https://github.com/CIRCL/AIL-framework
				
				
				
			
		
			
				
	
	
		
			63 lines
		
	
	
		
			2.0 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
			
		
		
	
	
			63 lines
		
	
	
		
			2.0 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
| #!/usr/bin/env python3
 | |
| # -*-coding:UTF-8 -*
 | |
| 
 | |
| import os
 | |
| import sys
 | |
| 
 | |
| sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
 | |
| import ConfigLoader
 | |
| 
 | |
| config_loader = ConfigLoader.ConfigLoader()
 | |
| # get and sanityze PASTE DIRECTORY
 | |
| PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/'
 | |
| PASTES_FOLDER = os.path.join(os.path.realpath(PASTES_FOLDER), '')
 | |
| 
 | |
| r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
 | |
| config_loader = None
 | |
| 
 | |
| def exist_item(item_id):
 | |
|     filename = get_item_filepath(item_id)
 | |
|     if os.path.isfile(filename):
 | |
|         return True
 | |
|     else:
 | |
|         return False
 | |
| 
 | |
| def get_item_filepath(item_id):
 | |
|     filename = os.path.join(PASTES_FOLDER, item_id)
 | |
|     return os.path.realpath(filename)
 | |
| 
 | |
| def get_item_date(item_id, add_separator=False):
 | |
|     l_directory = item_id.split('/')
 | |
|     if add_separator:
 | |
|         return '{}/{}/{}'.format(l_directory[-4], l_directory[-3], l_directory[-2])
 | |
|     else:
 | |
|         return '{}{}{}'.format(l_directory[-4], l_directory[-3], l_directory[-2])
 | |
| 
 | |
| # # TODO: add an option to check the tag
 | |
| def is_crawled(item_id):
 | |
|     return item_id.startswith('crawled')
 | |
| 
 | |
| def get_item_domain(item_id):
 | |
|     return item_id[19:-36]
 | |
| 
 | |
| def add_item_parent_by_parent_id(parent_type, parent_id, item_id):
 | |
|     parent_item_id = get_obj_id_item_id(parent_type, parent_id)
 | |
|     if parent_item_id:
 | |
|         add_item_parent(parent_item_id, item_id)
 | |
| 
 | |
| def add_item_parent(parent_item_id, item_id):
 | |
|     r_serv_metadata.hset('paste_metadata:{}'.format(item_id), 'father', parent_item_id)
 | |
|     r_serv_metadata.sadd('paste_children:{}'.format(parent_item_id), item_id)
 | |
|     return True
 | |
| 
 | |
| def add_map_obj_id_item_id(obj_id, item_id, obj_type):
 | |
|     if obj_type == 'twitter_id':
 | |
|         r_serv_metadata.hset('map:twitter_id:item_id', obj_id, item_id)
 | |
| 
 | |
| def get_obj_id_item_id(parent_type, parent_id):
 | |
|     all_parents_type = ['twitter_id']
 | |
|     if parent_type in all_parents_type:
 | |
|         return r_serv_metadata.hget('map:twitter_id:item_id', parent_id)
 | |
|     else:
 | |
|         return None
 |