mirror of https://github.com/MISP/misp-modules
				
				
				
			
		
			
				
	
	
		
			54 lines
		
	
	
		
			1.2 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
			
		
		
	
	
			54 lines
		
	
	
		
			1.2 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
| import json
 | |
| import requests
 | |
| from markdownify import markdownify
 | |
| from bs4 import BeautifulSoup
 | |
| 
 | |
| misperrors = {'error': 'Error'}
 | |
| mispattributes = {'input': ['url'], 'output': ['text']}
 | |
| moduleinfo = {'version': '0.1', 'author': 'Sami Mokaddem',
 | |
|               'description': 'Simple HTML fetcher',
 | |
|               'module-type': ['expansion']}
 | |
| 
 | |
| 
 | |
| def fetchHTML(url):
 | |
|     r = requests.get(url)
 | |
|     return r.text
 | |
| 
 | |
| 
 | |
| def stripUselessTags(html):
 | |
|     soup = BeautifulSoup(html, 'html.parser')
 | |
|     toRemove = ['script', 'head', 'header', 'footer', 'meta', 'link']
 | |
|     for tag in soup.find_all(toRemove):
 | |
|         tag.decompose()
 | |
|     return str(soup)
 | |
| 
 | |
| 
 | |
| def convertHTML(html):
 | |
|     toStrip = ['a', 'img']
 | |
|     return markdownify(html, heading_style='ATX', strip=toStrip)
 | |
| 
 | |
| 
 | |
| def handler(q=False):
 | |
|     if q is False:
 | |
|         return False
 | |
|     request = json.loads(q)
 | |
|     if request.get('url'):
 | |
|         url = request['url']
 | |
|     else:
 | |
|         return False
 | |
|     html = fetchHTML(url)
 | |
|     html = stripUselessTags(html)
 | |
|     markdown = convertHTML(html)
 | |
| 
 | |
|     r = {'results': [{'types': mispattributes['output'],
 | |
|                       'values':[str(markdown)]}]}
 | |
|     return r
 | |
| 
 | |
| 
 | |
| def introspection():
 | |
|     return mispattributes
 | |
| 
 | |
| 
 | |
| def version():
 | |
|     return moduleinfo
 |