diff --git a/.travis.yml b/.travis.yml index 3d9a67b..18c02c6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,8 +14,6 @@ install: - sudo apt-get install libzbar0 libzbar-dev libpoppler-cpp-dev - pip install pipenv - pipenv install --dev - # MKDOCS - - pip install -r docs/REQUIREMENTS.txt script: - pipenv run coverage run -m --parallel-mode --source=misp_modules misp_modules.__init__ -l 127.0.0.1 & @@ -35,14 +33,3 @@ script: after_success: - pipenv run coverage combine .coverage* - pipenv run codecov - # MKDOCS - - make ci_generate_docs - -deploy: - provider: pages - local-dir: site - skip-cleanup: true - github-token: $GITHUB_TOKEN # Set in the settings page of your repository, as a secure variable - keep-history: true - on: - branch: master \ No newline at end of file diff --git a/README.md b/README.md index 4e21c94..462e4c1 100644 --- a/README.md +++ b/README.md @@ -3,24 +3,552 @@ [![Build Status](https://travis-ci.org/MISP/misp-modules.svg?branch=master)](https://travis-ci.org/MISP/misp-modules) [![Coverage Status](https://coveralls.io/repos/github/MISP/misp-modules/badge.svg?branch=master)](https://coveralls.io/github/MISP/misp-modules?branch=master) [![codecov](https://codecov.io/gh/MISP/misp-modules/branch/master/graph/badge.svg)](https://codecov.io/gh/MISP/misp-modules) -[![FOSSA Status](https://app.fossa.io/api/projects/git%2Bgithub.com%2F8ear%2Fmisp-modules.svg?type=shield)](https://app.fossa.io/projects/git%2Bgithub.com%2F8ear%2Fmisp-modules?ref=badge_shield) - -## About MISP modules are autonomous modules that can be used for expansion and other services in [MISP](https://github.com/MISP/MISP). The modules are written in Python 3 following a simple API interface. The objective is to ease the extensions of MISP functionalities without modifying core components. The API is available via a simple REST API which is independent from MISP installation or configuration. -MISP modules support is included in MISP starting from version `2.4.28`. +MISP modules support is included in MISP starting from version 2.4.28. -For more information: [Extending MISP with Python modules](https://www.circl.lu/assets/files/misp-training/switch2016/2-misp-modules.pdf) slides from MISP training. +For more information: [Extending MISP with Python modules](https://www.misp-project.org/misp-training/3.1-misp-modules.pdf) slides from MISP training. +## Existing MISP modules + +### Expansion modules + +* [Backscatter.io](misp_modules/modules/expansion/backscatter_io.py) - a hover and expansion module to expand an IP address with mass-scanning observations. +* [BGP Ranking](misp_modules/modules/expansion/bgpranking.py) - a hover and expansion module to expand an AS number with the ASN description, its history, and position in BGP Ranking. +* [BTC scam check](misp_modules/modules/expansion/btc_scam_check.py) - An expansion hover module to instantly check if a BTC address has been abused. +* [BTC transactions](misp_modules/modules/expansion/btc_steroids.py) - An expansion hover module to get a blockchain balance and the transactions from a BTC address in MISP. +* [CIRCL Passive DNS](misp_modules/modules/expansion/circl_passivedns.py) - a hover and expansion module to expand hostname and IP addresses with passive DNS information. +* [CIRCL Passive SSL](misp_modules/modules/expansion/circl_passivessl.py) - a hover and expansion module to expand IP addresses with the X.509 certificate seen. +* [countrycode](misp_modules/modules/expansion/countrycode.py) - a hover module to tell you what country a URL belongs to. +* [CrowdStrike Falcon](misp_modules/modules/expansion/crowdstrike_falcon.py) - an expansion module to expand using CrowdStrike Falcon Intel Indicator API. +* [CVE](misp_modules/modules/expansion/cve.py) - a hover module to give more information about a vulnerability (CVE). +* [CVE advanced](misp_modules/modules/expansion/cve_advanced.py) - An expansion module to query the CIRCL CVE search API for more information about a vulnerability (CVE). +* [Cuckoo submit](misp_modules/modules/expansion/cuckoo_submit.py) - A hover module to submit malware sample, url, attachment, domain to Cuckoo Sandbox. +* [DBL Spamhaus](misp_modules/modules/expansion/dbl_spamhaus.py) - a hover module to check Spamhaus DBL for a domain name. +* [DNS](misp_modules/modules/expansion/dns.py) - a simple module to resolve MISP attributes like hostname and domain to expand IP addresses attributes. +* [docx-enrich](misp_modules/modules/expansion/docx-enrich.py) - an enrichment module to get text out of Word document into MISP (using free-text parser). +* [DomainTools](misp_modules/modules/expansion/domaintools.py) - a hover and expansion module to get information from [DomainTools](http://www.domaintools.com/) whois. +* [EUPI](misp_modules/modules/expansion/eupi.py) - a hover and expansion module to get information about an URL from the [Phishing Initiative project](https://phishing-initiative.eu/?lang=en). +* [Farsight DNSDB Passive DNS](misp_modules/modules/expansion/farsight_passivedns.py) - a hover and expansion module to expand hostname and IP addresses with passive DNS information. +* [GeoIP](misp_modules/modules/expansion/geoip_country.py) - a hover and expansion module to get GeoIP information from geolite/maxmind. +* [Greynoise](misp_modules/modules/expansion/greynoise.py) - a hover to get information from greynoise. +* [hashdd](misp_modules/modules/expansion/hashdd.py) - a hover module to check file hashes against [hashdd.com](http://www.hashdd.com) including NSLR dataset. +* [hibp](misp_modules/modules/expansion/hibp.py) - a hover module to lookup against Have I Been Pwned? +* [intel471](misp_modules/modules/expansion/intel471.py) - an expansion module to get info from [Intel471](https://intel471.com). +* [IPASN](misp_modules/modules/expansion/ipasn.py) - a hover and expansion to get the BGP ASN of an IP address. +* [iprep](misp_modules/modules/expansion/iprep.py) - an expansion module to get IP reputation from packetmail.net. +* [Joe Sandbox submit](misp_modules/modules/expansion/joesandbox_submit.py) - Submit files and URLs to Joe Sandbox. +* [Joe Sandbox query](misp_modules/modules/expansion/joesandbox_query.py) - Query Joe Sandbox with the link of an analysis and get the parsed data. +* [macaddress.io](misp_modules/modules/expansion/macaddress_io.py) - a hover module to retrieve vendor details and other information regarding a given MAC address or an OUI from [MAC address Vendor Lookup](https://macaddress.io). See [integration tutorial here](https://macaddress.io/integrations/MISP-module). +* [macvendors](misp_modules/modules/expansion/macvendors.py) - a hover module to retrieve mac vendor information. +* [ocr-enrich](misp_modules/modules/expansion/ocr-enrich.py) - an enrichment module to get OCRized data from images into MISP. +* [ods-enrich](misp_modules/modules/expansion/ods-enrich.py) - an enrichment module to get text out of OpenOffice spreadsheet document into MISP (using free-text parser). +* [odt-enrich](misp_modules/modules/expansion/odt-enrich.py) - an enrichment module to get text out of OpenOffice document into MISP (using free-text parser). +* [onyphe](misp_modules/modules/expansion/onyphe.py) - a modules to process queries on Onyphe. +* [onyphe_full](misp_modules/modules/expansion/onyphe_full.py) - a modules to process full queries on Onyphe. +* [OTX](misp_modules/modules/expansion/otx.py) - an expansion module for [OTX](https://otx.alienvault.com/). +* [passivetotal](misp_modules/modules/expansion/passivetotal.py) - a [passivetotal](https://www.passivetotal.org/) module that queries a number of different PassiveTotal datasets. +* [pdf-enrich](misp_modules/modules/expansion/pdf-enrich.py) - an enrichment module to extract text from PDF into MISP (using free-text parser). +* [pptx-enrich](misp_modules/modules/expansion/pptx-enrich.py) - an enrichment module to get text out of PowerPoint document into MISP (using free-text parser). +* [qrcode](misp_modules/modules/expansion/qrcode.py) - a module decode QR code, barcode and similar codes from an image and enrich with the decoded values. +* [rbl](misp_modules/modules/expansion/rbl.py) - a module to get RBL (Real-Time Blackhost List) values from an attribute. +* [reversedns](misp_modules/modules/expansion/reversedns.py) - Simple Reverse DNS expansion service to resolve reverse DNS from MISP attributes. +* [securitytrails](misp_modules/modules/expansion/securitytrails.py) - an expansion module for [securitytrails](https://securitytrails.com/). +* [shodan](misp_modules/modules/expansion/shodan.py) - a minimal [shodan](https://www.shodan.io/) expansion module. +* [Sigma queries](misp_modules/modules/expansion/sigma_queries.py) - Experimental expansion module querying a sigma rule to convert it into all the available SIEM signatures. +* [Sigma syntax validator](misp_modules/modules/expansion/sigma_syntax_validator.py) - Sigma syntax validator. +* [sourcecache](misp_modules/modules/expansion/sourcecache.py) - a module to cache a specific link from a MISP instance. +* [STIX2 pattern syntax validator](misp_modules/modules/expansion/stix2_pattern_syntax_validator.py) - a module to check a STIX2 pattern syntax. +* [ThreatCrowd](misp_modules/modules/expansion/threatcrowd.py) - an expansion module for [ThreatCrowd](https://www.threatcrowd.org/). +* [threatminer](misp_modules/modules/expansion/threatminer.py) - an expansion module to expand from [ThreatMiner](https://www.threatminer.org/). +* [urlhaus](misp_modules/modules/expansion/urlhaus.py) - Query urlhaus to get additional data about a domain, hash, hostname, ip or url. +* [urlscan](misp_modules/modules/expansion/urlscan.py) - an expansion module to query [urlscan.io](https://urlscan.io). +* [virustotal](misp_modules/modules/expansion/virustotal.py) - an expansion module to query the [VirusTotal](https://www.virustotal.com/gui/home) API with a high request rate limit required. (More details about the API: [here](https://developers.virustotal.com/reference)) +* [virustotal_public](misp_modules/modules/expansion/virustotal_public.py) - an expansion module to query the [VirusTotal](https://www.virustotal.com/gui/home) API with a public key and a low request rate limit. (More details about the API: [here](https://developers.virustotal.com/reference)) +* [VMray](misp_modules/modules/expansion/vmray_submit.py) - a module to submit a sample to VMray. +* [VulnDB](misp_modules/modules/expansion/vulndb.py) - a module to query [VulnDB](https://www.riskbasedsecurity.com/). +* [Vulners](misp_modules/modules/expansion/vulners.py) - an expansion module to expand information about CVEs using Vulners API. +* [whois](misp_modules/modules/expansion/whois.py) - a module to query a local instance of [uwhois](https://github.com/rafiot/uwhoisd). +* [wikidata](misp_modules/modules/expansion/wiki.py) - a [wikidata](https://www.wikidata.org) expansion module. +* [xforce](misp_modules/modules/expansion/xforceexchange.py) - an IBM X-Force Exchange expansion module. +* [xlsx-enrich](misp_modules/modules/expansion/xlsx-enrich.py) - an enrichment module to get text out of an Excel document into MISP (using free-text parser). +* [YARA query](misp_modules/modules/expansion/yara_query.py) - a module to create YARA rules from single hash attributes. +* [YARA syntax validator](misp_modules/modules/expansion/yara_syntax_validator.py) - YARA syntax validator. + +### Export modules + +* [CEF](misp_modules/modules/export_mod/cef_export.py) module to export Common Event Format (CEF). +* [Cisco FireSight Manager ACL rule](misp_modules/modules/export_mod/cisco_firesight_manager_ACL_rule_export.py) module to export as rule for the Cisco FireSight manager ACL. +* [GoAML export](misp_modules/modules/export_mod/goamlexport.py) module to export in [GoAML format](http://goaml.unodc.org/goaml/en/index.html). +* [Lite Export](misp_modules/modules/export_mod/liteexport.py) module to export a lite event. +* [PDF export](misp_modules/modules/export_mod/pdfexport.py) module to export an event in PDF. +* [Nexthink query format](misp_modules/modules/export_mod/nexthinkexport.py) module to export in Nexthink query format. +* [osquery](misp_modules/modules/export_mod/osqueryexport.py) module to export in [osquery](https://osquery.io/) query format. +* [ThreatConnect](misp_modules/modules/export_mod/threat_connect_export.py) module to export in ThreatConnect CSV format. +* [ThreatStream](misp_modules/modules/export_mod/threatStream_misp_export.py) module to export in ThreatStream format. + +### Import modules + +* [CSV import](misp_modules/modules/import_mod/csvimport.py) Customizable CSV import module. +* [Cuckoo JSON](misp_modules/modules/import_mod/cuckooimport.py) Cuckoo JSON import. +* [Email Import](misp_modules/modules/import_mod/email_import.py) Email import module for MISP to import basic metadata. +* [GoAML import](misp_modules/modules/import_mod/goamlimport.py) Module to import [GoAML](http://goaml.unodc.org/goaml/en/index.html) XML format. +* [Joe Sandbox import](misp_modules/modules/import_mod/joe_import.py) Parse data from a Joe Sandbox json report. +* [OCR](misp_modules/modules/import_mod/ocr.py) Optical Character Recognition (OCR) module for MISP to import attributes from images, scan or faxes. +* [OpenIOC](misp_modules/modules/import_mod/openiocimport.py) OpenIOC import based on PyMISP library. +* [ThreatAnalyzer](misp_modules/modules/import_mod/threatanalyzer_import.py) - An import module to process ThreatAnalyzer archive.zip/analysis.json sandbox exports. +* [VMRay](misp_modules/modules/import_mod/vmray_import.py) - An import module to process VMRay export. + +## How to install and start MISP modules in a Python virtualenv? (recommended) + +~~~~bash +sudo apt-get install python3-dev python3-pip libpq5 libjpeg-dev tesseract-ocr libpoppler-cpp-dev imagemagick virtualenv libopencv-dev zbar-tools libzbar0 libzbar-dev libfuzzy-dev -y +sudo -u www-data virtualenv -p python3 /var/www/MISP/venv +cd /usr/local/src/ +sudo git clone https://github.com/MISP/misp-modules.git +cd misp-modules +sudo -u www-data /var/www/MISP/venv/bin/pip install -I -r REQUIREMENTS +sudo -u www-data /var/www/MISP/venv/bin/pip install . +# Start misp-modules as a service +sudo cp etc/systemd/system/misp-modules.service /etc/systemd/system/ +sudo systemctl daemon-reload +sudo systemctl enable --now misp-modules +/var/www/MISP/venv/bin/misp-modules -l 127.0.0.1 -s & #to start the modules +~~~~ + +## How to install and start MISP modules on RHEL-based distributions ? +As of this writing, the official RHEL repositories only contain Ruby 2.0.0 and Ruby 2.1 or higher is required. As such, this guide installs Ruby 2.2 from the [SCL](https://access.redhat.com/documentation/en-us/red_hat_software_collections/3/html/3.2_release_notes/chap-installation#sect-Installation-Subscribe) repository. + +~~~~bash +sudo yum install rh-ruby22 +sudo yum install openjpeg-devel +sudo yum install rubygem-rouge rubygem-asciidoctor zbar-devel opencv-devel gcc-c++ pkgconfig poppler-cpp-devel python-devel redhat-rpm-config +cd /var/www/MISP +git clone https://github.com/MISP/misp-modules.git +cd misp-modules +sudo -u apache /usr/bin/scl enable rh-python36 "virtualenv -p python3 /var/www/MISP/venv" +sudo -u apache /var/www/MISP/venv/bin/pip install -U -I -r REQUIREMENTS +sudo -u apache /var/www/MISP/venv/bin/pip install -U . +~~~~ + +Create the service file /etc/systemd/system/misp-modules.service : +~~~~ +echo "[Unit] +Description=MISP's modules +After=misp-workers.service + +[Service] +Type=simple +User=apache +Group=apache +ExecStart=/usr/bin/scl enable rh-python36 rh-ruby22 '/var/www/MISP/venv/bin/misp-modules –l 127.0.0.1 –s' +Restart=always +RestartSec=10 + +[Install] +WantedBy=multi-user.target" | sudo tee /etc/systemd/system/misp-modules.service +~~~~ + +The `After=misp-workers.service` must be changed or removed if you have not created a misp-workers service. +Then, enable the misp-modules service and start it: +~~~~bash +systemctl daemon-reload +systemctl enable --now misp-modules +~~~~ + +## How to add your own MISP modules? + +Create your module in [misp_modules/modules/expansion/](misp_modules/modules/expansion/), [misp_modules/modules/export_mod/](misp_modules/modules/export_mod/), or [misp_modules/modules/import_mod/](misp_modules/modules/import_mod/). The module should have at minimum three functions: + +* **introspection** function that returns a dict of the supported attributes (input and output) by your expansion module. +* **handler** function which accepts a JSON document to expand the values and return a dictionary of the expanded values. +* **version** function that returns a dict with the version and the associated meta-data including potential configurations required of the module. + +Don't forget to return an error key and value if an error is raised to propagate it to the MISP user-interface. + +Your module's script name should also be added in the `__all__` list of `/__init__.py` in order for it to be loaded. + +~~~python +... + # Checking for required value + if not request.get('ip-src'): + # Return an error message + return {'error': "A source IP is required"} +... +~~~ + + +### introspection + +The function that returns a dict of the supported attributes (input and output) by your expansion module. + +~~~python +mispattributes = {'input': ['link', 'url'], + 'output': ['attachment', 'malware-sample']} + +def introspection(): + return mispattributes +~~~ + +### version + +The function that returns a dict with the version and the associated meta-data including potential configurations required of the module. + + +### Additional Configuration Values + +If your module requires additional configuration (to be exposed via the MISP user-interface), you can define those in the moduleconfig value returned by the version function. + +~~~python +# config fields that your code expects from the site admin +moduleconfig = ["apikey", "event_limit"] + +def version(): + moduleinfo['config'] = moduleconfig + return moduleinfo +~~~ + + +When you do this a config array is added to the meta-data output containing all the potential configuration values: + +~~~ +"meta": { + "description": "PassiveTotal expansion service to expand values with multiple Passive DNS sources", + "config": [ + "username", + "password" + ], + "module-type": [ + "expansion", + "hover" + ], + +... +~~~ + + +If you want to use the configuration values set in the web interface they are stored in the key `config` in the JSON object passed to the handler. + +~~~ +def handler(q=False): + + # Check if we were given a configuration + config = q.get("config", {}) + + # Find out if there is a username field + username = config.get("username", None) +~~~ + + +### handler + +The function which accepts a JSON document to expand the values and return a dictionary of the expanded values. + +~~~python +def handler(q=False): + "Fully functional rot-13 encoder" + if q is False: + return False + request = json.loads(q) + src = request.get('ip-src') + if src is None: + # Return an error message + return {'error': "A source IP is required"} + else: + return {'results': + codecs.encode(src, "rot-13")} +~~~ + +#### export module + +For an export module, the `request["data"]` object corresponds to a list of events (dictionaries) to handle. + +Iterating over events attributes is performed using their `Attribute` key. + +~~~python +... +for event in request["data"]: + for attribute in event["Attribute"]: + # do stuff w/ attribute['type'], attribute['value'], ... +... + +### Returning Binary Data + +If you want to return a file or other data you need to add a data attribute. + +~~~python +{"results": {"values": "filename.txt", + "types": "attachment", + "data" : base64.b64encode() # base64 encode your data first + "comment": "This is an attachment"}} +~~~ + +If the binary file is malware you can use 'malware-sample' as the type. If you do this the malware sample will be automatically zipped and password protected ('infected') after being uploaded. + + +~~~python +{"results": {"values": "filename.txt", + "types": "malware-sample", + "data" : base64.b64encode() # base64 encode your data first + "comment": "This is an attachment"}} +~~~ + +[To learn more about how data attributes are processed you can read the processing code here.](https://github.com/MISP/PyMISP/blob/4f230c9299ad9d2d1c851148c629b61a94f3f117/pymisp/mispevent.py#L185-L200) + + +### Module type + +A MISP module can be of four types: + +- **expansion** - service related to an attribute that can be used to extend and update an existing event. +- **hover** - service related to an attribute to provide additional information to the users without updating the event. +- **import** - service related to importing and parsing an external object that can be used to extend an existing event. +- **export** - service related to exporting an object, event, or data. + +module-type is an array where the list of supported types can be added. + +## Testing your modules? + +MISP uses the **modules** function to discover the available MISP modules and their supported MISP attributes: + +~~~ +% curl -s http://127.0.0.1:6666/modules | jq . +[ + { + "name": "passivetotal", + "type": "expansion", + "mispattributes": { + "input": [ + "hostname", + "domain", + "ip-src", + "ip-dst" + ], + "output": [ + "ip-src", + "ip-dst", + "hostname", + "domain" + ] + }, + "meta": { + "description": "PassiveTotal expansion service to expand values with multiple Passive DNS sources", + "config": [ + "username", + "password" + ], + "author": "Alexandre Dulaunoy", + "version": "0.1" + } + }, + { + "name": "sourcecache", + "type": "expansion", + "mispattributes": { + "input": [ + "link" + ], + "output": [ + "link" + ] + }, + "meta": { + "description": "Module to cache web pages of analysis reports, OSINT sources. The module returns a link of the cached page.", + "author": "Alexandre Dulaunoy", + "version": "0.1" + } + }, + { + "name": "dns", + "type": "expansion", + "mispattributes": { + "input": [ + "hostname", + "domain" + ], + "output": [ + "ip-src", + "ip-dst" + ] + }, + "meta": { + "description": "Simple DNS expansion service to resolve IP address from MISP attributes", + "author": "Alexandre Dulaunoy", + "version": "0.1" + } + } +] + +~~~ + +The MISP module service returns the available modules in a JSON array containing each module name along with their supported input attributes. + +Based on this information, a query can be built in a JSON format and saved as body.json: + +~~~json +{ + "hostname": "www.foo.be", + "module": "dns" +} +~~~ + +Then you can POST this JSON format query towards the MISP object server: + +~~~bash +curl -s http://127.0.0.1:6666/query -H "Content-Type: application/json" --data @body.json -X POST +~~~ + +The module should output the following JSON: + +~~~json +{ + "results": [ + { + "types": [ + "ip-src", + "ip-dst" + ], + "values": [ + "188.65.217.78" + ] + } + ] +} +~~~ + +It is also possible to restrict the category options of the resolved attributes by passing a list of categories along (optional): + +~~~json +{ + "results": [ + { + "types": [ + "ip-src", + "ip-dst" + ], + "values": [ + "188.65.217.78" + ], + "categories": [ + "Network activity", + "Payload delivery" + ] + } + ] +} +~~~ + +For both the type and the category lists, the first item in the list will be the default setting on the interface. + +### Enable your module in the web interface + +For a module to be activated in the MISP web interface it must be enabled in the "Plugin Settings. + +Go to "Administration > Server Settings" in the top menu +- Go to "Plugin Settings" in the top "tab menu bar" +- Click on the name of the type of module you have created to expand the list of plugins to show your module. +- Find the name of your plugin's "enabled" value in the Setting Column. +"Plugin.[MODULE NAME]_enabled" +- Double click on its "Value" column + +~~~ +Priority Setting Value Description Error Message +Recommended Plugin.Import_ocr_enabled false Enable or disable the ocr module. Value not set. +~~~ + +- Use the drop-down to set the enabled value to 'true' + +~~~ +Priority Setting Value Description Error Message +Recommended Plugin.Import_ocr_enabled true Enable or disable the ocr module. Value not set. +~~~ + +### Set any other required settings for your module + +In this same menu set any other plugin settings that are required for testing. + +## Install misp-module on an offline instance. +First, you need to grab all necessary packages for example like this : + +Use pip wheel to create an archive +~~~ +mkdir misp-modules-offline +pip3 wheel -r REQUIREMENTS shodan --wheel-dir=./misp-modules-offline +tar -cjvf misp-module-bundeled.tar.bz2 ./misp-modules-offline/* +~~~ +On offline machine : +~~~ +mkdir misp-modules-bundle +tar xvf misp-module-bundeled.tar.bz2 -C misp-modules-bundle +cd misp-modules-bundle +ls -1|while read line; do sudo pip3 install --force-reinstall --ignore-installed --upgrade --no-index --no-deps ${line};done +~~~ +Next you can follow standard install procedure. + +## How to contribute your own module? + +Fork the project, add your module, test it and make a pull-request. Modules can be also private as you can add a module in your own MISP installation. + + +## Tips for developers creating modules + +Download a pre-built virtual image from the [MISP training materials](https://www.circl.lu/services/misp-training-materials/). + +- Create a Host-Only adapter in VirtualBox +- Set your Misp OVA to that Host-Only adapter +- Start the virtual machine +- Get the IP address of the virtual machine +- SSH into the machine (Login info on training page) +- Go into the misp-modules directory + +~~~bash +cd /usr/local/src/misp-modules +~~~ + +Set the git repo to your fork and checkout your development branch. If you SSH'ed in as the misp user you will have to use sudo. + +~~~bash +sudo git remote set-url origin https://github.com/YourRepo/misp-modules.git +sudo git pull +sudo git checkout MyModBranch +~~~ + +Remove the contents of the build directory and re-install misp-modules. + +~~~bash +sudo rm -fr build/* +sudo -u www-data /var/www/MISP/venv/bin/pip install --upgrade . +~~~ + +SSH in with a different terminal and run `misp-modules` with debugging enabled. + +~~~bash +# In case misp-modules is not a service do: +# sudo killall misp-modules +sudo systemctl disable --now misp-modules +sudo -u www-data /var/www/MISP/venv/bin/misp-modules -d +~~~ + + +In your original terminal you can now run your tests manually and see any errors that arrive + +~~~bash +cd tests/ +curl -s http://127.0.0.1:6666/query -H "Content-Type: application/json" --data @MY_TEST_FILE.json -X POST +cd ../ +~~~ ## Documentation -The new documentation can found [here](https://misp.github.io/misp-modules). +In order to provide documentation about some modules that require specific input / output / configuration, the [doc](doc) directory contains detailed information about the general purpose, requirements, features, input and ouput of each of these modules: - -## License -[![FOSSA Status](https://app.fossa.io/api/projects/git%2Bgithub.com%2F8ear%2Fmisp-modules.svg?type=large)](https://app.fossa.io/projects/git%2Bgithub.com%2F8ear%2Fmisp-modules?ref=badge_large) +- ***description** - quick description of the general purpose of the module, as the one given by the moduleinfo +- **requirements** - special libraries needed to make the module work +- **features** - description of the way to use the module, with the required MISP features to make the module give the intended result +- **references** - link(s) giving additional information about the format concerned in the module +- **input** - description of the format of data used in input +- **output** - description of the format given as the result of the module execution diff --git a/REQUIREMENTS b/REQUIREMENTS index 1a9c146..6f6a068 100644 --- a/REQUIREMENTS +++ b/REQUIREMENTS @@ -3,7 +3,7 @@ -e git+https://github.com/D4-project/BGP-Ranking.git/@429cea9c0787876820984a2df4e982449a84c10e#egg=pybgpranking&subdirectory=client -e git+https://github.com/D4-project/IPASN-History.git/@47cd0f2658ab172fce42126ff3a1dbcddfb0b5fb#egg=pyipasnhistory&subdirectory=client -e git+https://github.com/MISP/PyIntel471.git@0df8d51f1c1425de66714b3a5a45edb69b8cc2fc#egg=pyintel471 --e git+https://github.com/MISP/PyMISP.git@583fb6592495ea358aad47a8a1ec92d43c13348a#egg=pymisp +-e git+https://github.com/MISP/PyMISP.git@3ad351380055f0a655ed529b9c79b242a9227b84#egg=pymisp -e git+https://github.com/Rafiot/uwhoisd.git@411572840eba4c72dc321c549b36a54ed5cea9de#egg=uwhois&subdirectory=client -e git+https://github.com/cartertemm/ODTReader.git/@49d6938693f6faa3ff09998f86dba551ae3a996b#egg=odtreader -e git+https://github.com/sebdraven/pydnstrails@48c1f740025c51289f43a24863d1845ff12fd21a#egg=pydnstrails diff --git a/docs/contribute.md b/docs/contribute.md index 4eea441..ef312f6 100644 --- a/docs/contribute.md +++ b/docs/contribute.md @@ -322,8 +322,9 @@ In order to provide documentation about some modules that require specific input - **input** - description of the format of data used in input - **output** - description of the format given as the result of the module execution -In addition to the modul documentation please add your module to [docs/index.md](https://github.com/MISP/misp-modules/tree/master/docs/index.md). +In addition to the module documentation please add your module to [docs/index.md](https://github.com/MISP/misp-modules/tree/master/docs/index.md). +There are also [complementary slides](https://www.misp-project.org/misp-training/3.1-misp-modules.pdf) for the creation of MISP modules. ## Tips for developers creating modules diff --git a/misp_modules/lib/joe_parser.py b/misp_modules/lib/joe_parser.py index ccbfb7c..00aa868 100644 --- a/misp_modules/lib/joe_parser.py +++ b/misp_modules/lib/joe_parser.py @@ -405,7 +405,7 @@ class JoeParser(): def finalize_results(self): if self.references: self.build_references() - event = json.loads(self.misp_event.to_json())['Event'] + event = json.loads(self.misp_event.to_json()) self.results = {key: event[key] for key in ('Attribute', 'Object', 'Tag') if (key in event and event[key])} @staticmethod diff --git a/misp_modules/modules/expansion/cve_advanced.py b/misp_modules/modules/expansion/cve_advanced.py index 62c49e2..b823761 100644 --- a/misp_modules/modules/expansion/cve_advanced.py +++ b/misp_modules/modules/expansion/cve_advanced.py @@ -1,3 +1,4 @@ +from collections import defaultdict from pymisp import MISPEvent, MISPObject import json import requests @@ -12,17 +13,25 @@ cveapi_url = 'https://cve.circl.lu/api/cve/' class VulnerabilityParser(): - def __init__(self, vulnerability): + def __init__(self, attribute, vulnerability): + self.attribute = attribute self.vulnerability = vulnerability self.misp_event = MISPEvent() + self.misp_event.add_attribute(**attribute) + self.references = defaultdict(list) + self.capec_features = ('id', 'name', 'summary', 'prerequisites', 'solutions') self.vulnerability_mapping = { 'id': ('text', 'id'), 'summary': ('text', 'summary'), 'vulnerable_configuration_cpe_2_2': ('text', 'vulnerable_configuration'), 'Modified': ('datetime', 'modified'), 'Published': ('datetime', 'published'), 'references': ('link', 'references'), 'cvss': ('float', 'cvss-score')} + self.weakness_mapping = {'name': 'name', 'description_summary': 'description', + 'status': 'status', 'weaknessabs': 'weakness-abs'} def get_result(self): - event = json.loads(self.misp_event.to_json())['Event'] + if self.references: + self.__build_references() + event = json.loads(self.misp_event.to_json()) results = {key: event[key] for key in ('Attribute', 'Object') if (key in event and event[key])} return {'results': results} @@ -41,7 +50,50 @@ class VulnerabilityParser(): attribute_type, relation = self.vulnerability_mapping[feature] for value in self.vulnerability[feature]: vulnerability_object.add_attribute(relation, **{'type': attribute_type, 'value': value}) + vulnerability_object.add_reference(self.attribute['uuid'], 'related-to') self.misp_event.add_object(**vulnerability_object) + if 'cwe' in self.vulnerability and self.vulnerability['cwe'] != 'Unknown': + self.__parse_weakness(vulnerability_object.uuid) + if 'capec' in self.vulnerability: + self.__parse_capec(vulnerability_object.uuid) + + def __build_references(self): + for object_uuid, references in self.references.items(): + for misp_object in self.misp_event.objects: + if misp_object.uuid == object_uuid: + for reference in references: + misp_object.add_reference(**reference) + break + + def __parse_capec(self, vulnerability_uuid): + attribute_type = 'text' + for capec in self.vulnerability['capec']: + capec_object = MISPObject('attack-pattern') + for feature in self.capec_features: + capec_object.add_attribute(feature, **dict(type=attribute_type, value=capec[feature])) + for related_weakness in capec['related_weakness']: + attribute = dict(type='weakness', value="CWE-{}".format(related_weakness)) + capec_object.add_attribute('related-weakness', **attribute) + self.misp_event.add_object(**capec_object) + self.references[vulnerability_uuid].append(dict(referenced_uuid=capec_object.uuid, + relationship_type='targeted-by')) + + def __parse_weakness(self, vulnerability_uuid): + attribute_type = 'text' + cwe_string, cwe_id = self.vulnerability['cwe'].split('-') + cwes = requests.get(cveapi_url.replace('/cve/', '/cwe')) + if cwes.status_code == 200: + for cwe in cwes.json(): + if cwe['id'] == cwe_id: + weakness_object = MISPObject('weakness') + weakness_object.add_attribute('id', **dict(type=attribute_type, value='-'.join([cwe_string, cwe_id]))) + for feature, relation in self.weakness_mapping.items(): + if cwe.get(feature): + weakness_object.add_attribute(relation, **dict(type=attribute_type, value=cwe[feature])) + self.misp_event.add_object(**weakness_object) + self.references[vulnerability_uuid].append(dict(referenced_uuid=weakness_object.uuid, + relationship_type='weakened-by')) + break def handler(q=False): @@ -61,7 +113,7 @@ def handler(q=False): else: misperrors['error'] = 'cve.circl.lu API not accessible' return misperrors['error'] - parser = VulnerabilityParser(vulnerability) + parser = VulnerabilityParser(attribute, vulnerability) parser.parse_vulnerability_information() return parser.get_result() diff --git a/misp_modules/modules/expansion/urlhaus.py b/misp_modules/modules/expansion/urlhaus.py index 64d7527..21a3718 100644 --- a/misp_modules/modules/expansion/urlhaus.py +++ b/misp_modules/modules/expansion/urlhaus.py @@ -31,7 +31,7 @@ class URLhaus(): return vt_object def get_result(self): - event = json.loads(self.misp_event.to_json())['Event'] + event = json.loads(self.misp_event.to_json()) results = {key: event[key] for key in ('Attribute', 'Object') if (key in event and event[key])} return {'results': results} diff --git a/misp_modules/modules/expansion/virustotal.py b/misp_modules/modules/expansion/virustotal.py index 9660b5f..c6263fc 100644 --- a/misp_modules/modules/expansion/virustotal.py +++ b/misp_modules/modules/expansion/virustotal.py @@ -35,7 +35,7 @@ class VirusTotalParser(object): return self.input_types_mapping[self.attribute.type](self.attribute.value, recurse=True) def get_result(self): - event = json.loads(self.misp_event.to_json())['Event'] + event = json.loads(self.misp_event.to_json()) results = {key: event[key] for key in ('Attribute', 'Object') if (key in event and event[key])} return {'results': results} diff --git a/misp_modules/modules/expansion/virustotal_public.py b/misp_modules/modules/expansion/virustotal_public.py index a614a8c..7074826 100644 --- a/misp_modules/modules/expansion/virustotal_public.py +++ b/misp_modules/modules/expansion/virustotal_public.py @@ -23,7 +23,7 @@ class VirusTotalParser(): self.apikey = apikey def get_result(self): - event = json.loads(self.misp_event.to_json())['Event'] + event = json.loads(self.misp_event.to_json()) results = {key: event[key] for key in ('Attribute', 'Object') if (key in event and event[key])} return {'results': results} diff --git a/misp_modules/modules/import_mod/csvimport.py b/misp_modules/modules/import_mod/csvimport.py index 5d7408c..adce34a 100644 --- a/misp_modules/modules/import_mod/csvimport.py +++ b/misp_modules/modules/import_mod/csvimport.py @@ -194,7 +194,7 @@ class CsvParser(): return list2pop, misp, list(reversed(head)) def finalize_results(self): - event = json.loads(self.misp_event.to_json())['Event'] + event = json.loads(self.misp_event.to_json()) self.results = {key: event[key] for key in ('Attribute', 'Object') if (key in event and event[key])} diff --git a/misp_modules/modules/import_mod/cuckooimport.py b/misp_modules/modules/import_mod/cuckooimport.py index fc0c30e..3ed52bd 100755 --- a/misp_modules/modules/import_mod/cuckooimport.py +++ b/misp_modules/modules/import_mod/cuckooimport.py @@ -1,198 +1,744 @@ import json import base64 +import io +import logging +import posixpath +import stat +import tarfile +import zipfile +from pymisp import MISPEvent, MISPObject, MISPAttribute +from pymisp.tools import make_binary_objects +from collections import OrderedDict + +log = logging.getLogger(__name__) misperrors = {'error': 'Error'} -userConfig = {} -inputSource = ['file'] -moduleinfo = {'version': '0.1', 'author': 'Victor van der Stoep', - 'description': 'Cuckoo JSON import', - 'module-type': ['import']} +moduleinfo = { + 'version': '1.1', + 'author': 'Pierre-Jean Grenier', + 'description': "Import a Cuckoo archive (zipfile or bzip2 tarball), " + "either downloaded manually or exported from the " + "API (/tasks/report/{task_id}/all).", + 'module-type': ['import'], +} moduleconfig = [] +mispattributes = { + 'inputSource': ['file'], + 'output': ['MISP objects', 'malware-sample'], + 'format': 'misp_standard', +} + +# Attributes for which we can set the "Artifacts dropped" +# category if we want to +ARTIFACTS_DROPPED = ( + "filename", + "md5", + "sha1", + "sha256", + "sha512", + "malware-sample", + "mimetype", + "ssdeep", +) + +# Same for the category "Payload delivery" +PAYLOAD_DELIVERY = ARTIFACTS_DROPPED + + +class PrettyDict(OrderedDict): + """ + This class is just intended for a pretty print + of its keys and values. + """ + MAX_SIZE = 30 + + def __str__(self): + tmp = [] + for k, v in self.items(): + v = str(v) + if len(v) > self.MAX_SIZE: + k += ',cut' + v = v[:self.MAX_SIZE] + v.replace('\n', ' ') + tmp.append((k, v)) + return "; ".join(f"({k}) {v}" for k, v in tmp) + + +def search_objects(event, name, attributes=[]): + """ + Search for objects in event, which name is `name` and + contain at least the attributes given. + Return a generator. + @ param attributes: a list of (object_relation, value) + """ + match = filter( + lambda obj: all( + obj.name == name + and (obj_relation, str(attr_value)) in map( + lambda attr: (attr.object_relation, str(attr.value)), + obj.attributes + ) + for obj_relation, attr_value in attributes + ), event.objects + ) + return match + + +def find_process_by_pid(event, pid): + """ + Find a 'process' MISPObject by its PID. If multiple objects are found, + only return the first one. + @ param pid: integer or str + """ + generator = search_objects( + event, + "process", + (('pid', pid),) + ) + return next(generator, None) + + +class CuckooParser(): + # This dict is used to generate the userConfig and link the different + # options to the corresponding method of the parser. This way, we avoid + # redundancy and make future changes easier (instead of for instance + # defining all the options in userConfig directly, and then making a + # switch when running the parser). + # Careful about the order here, as we create references between + # MISPObjects/MISPAttributes at the same time we generate them. + # Hence when we create object B, which we want to reference to + # object A, we should already have created object A. + # TODO create references only after all parsing is done + options = { + "Sandbox info": { + "method": lambda self: self.add_sandbox_info(), + "userConfig": { + 'type': 'Boolean', + 'message': "Add info related to the sandbox", + 'checked': 'true', + }, + }, + "Upload sample": { + "method": lambda self: self.add_sample(), + "userConfig": { + 'type': 'Boolean', + 'message': "Upload the sample", + 'checked': 'true', + }, + }, + "Processes": { + "method": lambda self: self.add_process_tree(), + "userConfig": { + 'type': 'Boolean', + 'message': "Add info related to the processes", + 'checked': 'true', + }, + }, + "DNS": { + "method": lambda self: self.add_dns(), + "userConfig": { + 'type': 'Boolean', + 'message': "Add DNS queries/answers", + 'checked': 'true', + }, + }, + "TCP": { + "method": lambda self: self.add_network("tcp"), + "userConfig": { + 'type': 'Boolean', + 'message': "Add TCP connections", + 'checked': 'true', + }, + }, + "UDP": { + "method": lambda self: self.add_network("udp"), + "userConfig": { + 'type': 'Boolean', + 'message': "Add UDP connections", + 'checked': 'true', + }, + }, + "HTTP": { + "method": lambda self: self.add_http(), + "userConfig": { + 'type': 'Boolean', + 'message': "Add HTTP requests", + 'checked': 'true', + }, + }, + "Signatures": { + "method": lambda self: self.add_signatures(), + "userConfig": { + 'type': 'Boolean', + 'message': "Add Cuckoo's triggered signatures", + 'checked': 'true', + }, + }, + "Screenshots": { + "method": lambda self: self.add_screenshots(), + "userConfig": { + 'type': 'Boolean', + 'message': "Upload the screenshots", + 'checked': 'true', + }, + }, + "Dropped files": { + "method": lambda self: self.add_dropped_files(), + "userConfig": { + 'type': 'Boolean', + 'message': "Upload the dropped files", + 'checked': 'true', + }, + }, + "Dropped buffers": { + "method": lambda self: self.add_dropped_buffers(), + "userConfig": { + 'type': 'Boolean', + 'message': "Upload the dropped buffers", + 'checked': 'true', + }, + }, + } + + def __init__(self, config): + self.event = MISPEvent() + self.files = None + self.malware_binary = None + self.report = None + self.config = { + # if an option is missing (we receive None as a value), + # fall back to the default specified in the options + key: int( + on if on is not None + else self.options[key]["userConfig"]["checked"] == 'true' + ) + for key, on in config.items() + } + + def get_file(self, relative_filepath): + """Return an io.BufferedIOBase for the corresponding relative_filepath + in the Cuckoo archive. If not found, return an empty io.BufferedReader + to avoid fatal errors.""" + blackhole = io.BufferedReader(open('/dev/null', 'rb')) + res = self.files.get(relative_filepath, blackhole) + if res == blackhole: + log.debug(f"Did not find file {relative_filepath}, " + f"returned an empty file instead") + return res + + def read_archive(self, archive_encoded): + """Read the archive exported from Cuckoo and initialize the class""" + # archive_encoded is base 64 encoded content + # we extract the info about each file but do not retrieve + # it automatically, as it may take too much space in memory + buf_io = io.BytesIO(base64.b64decode(archive_encoded)) + if zipfile.is_zipfile(buf_io): + # the archive was probably downloaded from the WebUI + buf_io.seek(0) # don't forget this not to read an empty buffer + z = zipfile.ZipFile(buf_io, 'r') + self.files = { + info.filename: z.open(info) + for info in z.filelist + # only extract the regular files and dirs, we don't + # want any symbolic link + if stat.S_ISREG(info.external_attr >> 16) + or stat.S_ISDIR(info.external_attr >> 16) + } + else: + # the archive was probably downloaded from the API + buf_io.seek(0) # don't forget this not to read an empty buffer + f = tarfile.open(fileobj=buf_io, mode='r:bz2') + self.files = { + info.name: f.extractfile(info) + for info in f.getmembers() + # only extract the regular files and dirs, we don't + # want any symbolic link + if info.isreg() or info.isdir() + } + + # We want to keep the order of the keys of sub-dicts in the report, + # eg. the signatures have marks with unknown keys such as + # {'marks': [ + # {"suspicious_features": "Connection to IP address", + # "suspicious_request": "OPTIONS http://85.20.18.18/doc"} + # ]} + # To render those marks properly, we can only hope the developpers + # thought about the order in which they put the keys, and keep this + # order so that the signature makes sense to the reader. + # We use PrettyDict, a customization of OrderedDict to do so. + # It will be instanced iteratively when parsing the json (ie. subdicts + # will also be instanced as PrettyDict) + self.report = json.load( + self.get_file("reports/report.json"), + object_pairs_hook=PrettyDict, + ) + + def read_malware(self): + self.malware_binary = self.get_file("binary").read() + if not self.malware_binary: + log.warn("No malware binary found") + + def add_sandbox_info(self): + info = self.report.get("info", {}) + if not info: + log.warning("The 'info' field was not found " + "in the report, skipping") + return False + + o = MISPObject(name='sandbox-report') + o.add_attribute('score', info['score']) + o.add_attribute('sandbox-type', 'on-premise') + o.add_attribute('on-premise-sandbox', 'cuckoo') + o.add_attribute('raw-report', + f'started on:{info["machine"]["started_on"]} ' + f'duration:{info["duration"]}s ' + f'vm:{info["machine"]["name"]}/' + f'{info["machine"]["label"]}') + self.event.add_object(o) + + def add_sample(self): + """Add the sample/target of the analysis""" + target = self.report.get("target", {}) + category = target.get("category", "") + if not category: + log.warning("Could not find info about the sample " + "in the report, skipping") + return False + + if category == "file": + log.debug("Sample is a file, uploading it") + self.read_malware() + file_o, bin_type_o, bin_section_li = make_binary_objects( + pseudofile=io.BytesIO(self.malware_binary), + filename=target["file"]["name"], + ) + + file_o.comment = "Submitted sample" + # fix categories + for obj in filter(None, (file_o, bin_type_o, *bin_section_li,)): + for attr in obj.attributes: + if attr.type in PAYLOAD_DELIVERY: + attr.category = "Payload delivery" + self.event.add_object(obj) + + elif category == "url": + log.debug("Sample is a URL") + o = MISPObject(name='url') + o.add_attribute('url', target['url']) + o.add_attribute('text', "Submitted URL") + self.event.add_object(o) + + def add_http(self): + """Add the HTTP requests""" + network = self.report.get("network", []) + http = network.get("http", []) + if not http: + log.info("No HTTP connection found in the report, skipping") + return False + + for request in http: + o = MISPObject(name='http-request') + o.add_attribute('host', request['host']) + o.add_attribute('method', request['method']) + o.add_attribute('uri', request['uri']) + o.add_attribute('user-agent', request['user-agent']) + o.add_attribute('text', f"count:{request['count']} " + f"port:{request['port']}") + self.event.add_object(o) + + def add_network(self, proto=None): + """ + Add UDP/TCP traffic + proto must be one of "tcp", "udp" + """ + network = self.report.get("network", []) + li_conn = network.get(proto, []) + if not li_conn: + log.info(f"No {proto} connection found in the report, skipping") + return False + + from_to = [] + # sort by time to get the "first packet seen" right + li_conn.sort(key=lambda x: x["time"]) + for conn in li_conn: + src = conn['src'] + dst = conn['dst'] + sport = conn['sport'] + dport = conn['dport'] + if (src, sport, dst, dport) in from_to: + continue + + from_to.append((src, sport, dst, dport)) + + o = MISPObject(name='network-connection') + o.add_attribute('ip-src', src) + o.add_attribute('ip-dst', dst) + o.add_attribute('src-port', sport) + o.add_attribute('dst-port', dport) + o.add_attribute('layer3-protocol', "IP") + o.add_attribute('layer4-protocol', proto.upper()) + o.add_attribute('first-packet-seen', conn['time']) + self.event.add_object(o) + + def add_dns(self): + """Add DNS records""" + network = self.report.get("network", []) + dns = network.get("dns", []) + if not dns: + log.info("No DNS connection found in the report, skipping") + return False + + for record in dns: + o = MISPObject(name='dns-record') + o.add_attribute('text', f"request type:{record['type']}") + o.add_attribute('queried-domain', record['request']) + for answer in record.get("answers", []): + if answer["type"] in ("A", "AAAA"): + o.add_attribute('a-record', answer['data']) + # TODO implement MX/NS + + self.event.add_object(o) + + def _get_marks_str(self, marks): + marks_strings = [] + for m in marks: + m_type = m.pop("type") # temporarily remove the type + + if m_type == "generic": + marks_strings.append(str(m)) + + elif m_type == "ioc": + marks_strings.append(m['ioc']) + + elif m_type == "call": + call = m["call"] + arguments = call.get("arguments", {}) + flags = call.get("flags", {}) + info = "" + for details in (arguments, flags): + info += f" {details}" + marks_strings.append(f"Call API '{call['api']}'%s" % info) + + else: + logging.debug(f"Unknown mark type '{m_type}', skipping") + + m["type"] = m_type # restore key 'type' + # TODO implemented marks 'config' and 'volatility' + return marks_strings + + def _add_ttp(self, attribute, ttp_short, ttp_num): + """ + Internal wrapper to add the TTP tag from the MITRE galaxy. + @ params + - attribute: MISPAttribute + - ttp_short: short description of the TTP + (eg. "Credential Dumping") + - ttp_num: formatted as "T"+int + (eg. T1003) + """ + attribute.add_tag(f'misp-galaxy:mitre-attack-pattern=' + f'"{ttp_short} - {ttp_num}"') + + def add_signatures(self): + """Add the Cuckoo signatures, with as many details as possible + regarding the marks""" + signatures = self.report.get("signatures", []) + if not signatures: + log.info("No signature found in the report") + return False + + o = MISPObject(name='sb-signature') + o.add_attribute('software', "Cuckoo") + + for sign in signatures: + marks = sign["marks"] + marks_strings = self._get_marks_str(marks) + summary = sign['description'] + if marks_strings: + summary += "\n---\n" + + marks_strings = set(marks_strings) + description = summary + "\n".join(marks_strings) + + a = MISPAttribute() + a.from_dict(type='text', value=description) + for ttp_num, desc in sign.get("ttp", {}).items(): + ttp_short = desc["short"] + self._add_ttp(a, ttp_short, ttp_num) + + # this signature was triggered by the processes with the following + # PIDs, we can create references + triggered_by_pids = filter( + None, + (m.get("pid", None) for m in marks) + ) + # remove redundancy + triggered_by_pids = set(triggered_by_pids) + for pid in triggered_by_pids: + process_o = find_process_by_pid(self.event, pid) + if process_o: + process_o.add_reference(a, "triggers") + + o.add_attribute('signature', **a) + + self.event.add_object(o) + + def _handle_process(self, proc, accu): + """ + This is an internal recursive function to handle one process + from a process tree and then iterate on its children. + List the objects to be added, based on the tree, into the `accu` list. + The `accu` list uses a DFS-like order. + """ + o = MISPObject(name='process') + accu.append(o) + o.add_attribute('pid', proc['pid']) + o.add_attribute('command-line', proc['command_line']) + o.add_attribute('name', proc['process_name']) + o.add_attribute('parent-pid', proc['ppid']) + for child in proc.get('children', []): + pos_child = len(accu) + o.add_attribute('child-pid', child['pid']) + self._handle_process(child, accu) + child_obj = accu[pos_child] + child_obj.add_reference(o, 'child-of') + + return o + + def add_process_tree(self): + """Add process tree from the report, as separated process objects""" + behavior = self.report.get("behavior", {}) + tree = behavior.get("processtree", []) + if not tree: + log.warning("No process tree found in the report, skipping") + return False + + for proc in tree: + objs = [] + self._handle_process(proc, objs) + for o in objs: + self.event.add_object(o) + + def get_relpath(self, path): + """ + Transform an absolute or relative path into a path relative to the + correct cuckoo analysis directory, without knowing the cuckoo + working directory. + Return an empty string if the path given does not refer to a + file from the analysis directory. + """ + head, tail = posixpath.split(path) + if not tail: + return "" + prev = self.get_relpath(head) + longer = posixpath.join(prev, tail) + if longer in self.files: + return longer + elif tail in self.files: + return tail + else: + return "" + + def add_screenshots(self): + """Add the screenshots taken by Cuckoo in a sandbox-report object""" + screenshots = self.report.get('screenshots', []) + if not screenshots: + log.info("No screenshot found in the report, skipping") + return False + + o = MISPObject(name='sandbox-report') + o.add_attribute('sandbox-type', 'on-premise') + o.add_attribute('on-premise-sandbox', "cuckoo") + for shot in screenshots: + # The path given by Cuckoo is an absolute path, but we need a path + # relative to the analysis folder. + path = self.get_relpath(shot['path']) + img = self.get_file(path) + # .decode('utf-8') in order to avoid the b'' format + img_data = base64.b64encode(img.read()).decode('utf-8') + filename = posixpath.basename(path) + + o.add_attribute( + "sandbox-file", value=filename, + data=img_data, type='attachment', + category="External analysis", + ) + + self.event.add_object(o) + + def _get_dropped_objs(self, path, filename=None, comment=None): + """ + Internal wrapper to get dropped files/buffers as file objects + @ params + - path: relative to the cuckoo analysis directory + - filename: if not specified, deduced from the path + """ + if not filename: + filename = posixpath.basename(path) + + dropped_file = self.get_file(path) + dropped_binary = io.BytesIO(dropped_file.read()) + # create ad hoc objects + file_o, bin_type_o, bin_section_li = make_binary_objects( + pseudofile=dropped_binary, filename=filename, + ) + + if comment: + file_o.comment = comment + # fix categories + for obj in filter(None, (file_o, bin_type_o, *bin_section_li,)): + for attr in obj.attributes: + if attr.type in ARTIFACTS_DROPPED: + attr.category = "Artifacts dropped" + + return file_o, bin_type_o, bin_section_li + + def _add_yara(self, obj, yara_dict): + """Internal wrapper to add Yara matches to an MISPObject""" + for yara in yara_dict: + description = yara.get("meta", {}).get("description", "") + name = yara.get("name", "") + obj.add_attribute( + "text", + f"Yara match\n(name) {name}\n(description) {description}", + comment="Yara match" + ) + + def add_dropped_files(self): + """Upload the dropped files as file objects""" + dropped = self.report.get("dropped", []) + if not dropped: + log.info("No dropped file found, skipping") + return False + + for d in dropped: + # Cuckoo logs three things that are of interest for us: + # - 'filename' which is not the original name of the file + # but is formatted as follow: + # 8 first bytes of SHA265 + _ + original name in lower case + # - 'filepath' which is the original filepath on the VM, + # where the file was dropped + # - 'path' which is the local path of the stored file, + # in the cuckoo archive + filename = d.get("name", "") + original_path = d.get("filepath", "") + sha256 = d.get("sha256", "") + if original_path and sha256: + log.debug(f"Will now try to restore original filename from " + f"path {original_path}") + try: + s = filename.split("_") + if not s: + raise Exception("unexpected filename read " + "in the report") + sha256_first_8_bytes = s[0] + original_name = s[1] + # check our assumptions are valid, if so we can safely + # restore the filename, if not the format may have changed + # so we'll keep the filename of the report + if sha256.startswith(sha256_first_8_bytes) and \ + original_path.lower().endswith(original_name) and \ + filename not in original_path.lower(): + # we can restore the original case of the filename + position = original_path.lower().rindex(original_name) + filename = original_path[position:] + log.debug(f"Successfully restored original filename: " + f"{filename}") + else: + raise Exception("our assumptions were wrong, " + "filename format may have changed") + except Exception as e: + log.debug(f"Cannot restore filename: {e}") + + if not filename: + filename = "NO NAME FOUND IN THE REPORT" + log.warning(f'No filename found for dropped file! ' + f'Will use "{filename}"') + + file_o, bin_type_o, bin_section_o = self._get_dropped_objs( + self.get_relpath(d['path']), + filename=filename, + comment="Dropped file" + ) + + self._add_yara(file_o, d.get("yara", [])) + + file_o.add_attribute("fullpath", original_path, + category="Artifacts dropped") + + # why is this a list? for when various programs drop the same file? + for pid in d.get("pids", []): + # if we have an object for the process that dropped the file, + # we can link the two (we just take the first result from + # the search) + process_o = find_process_by_pid(self.event, pid) + if process_o: + file_o.add_reference(process_o, "dropped-by") + + self.event.add_object(file_o) + + def add_dropped_buffers(self): + """"Upload the dropped buffers as file objects""" + buffer = self.report.get("buffer", []) + if not buffer: + log.info("No dropped buffer found, skipping") + return False + + for i, buf in enumerate(buffer): + file_o, bin_type_o, bin_section_o = self._get_dropped_objs( + self.get_relpath(buf['path']), + filename=f"buffer {i}", + comment="Dropped buffer" + ) + self._add_yara(file_o, buf.get("yara", [])) + self.event.add_object(file_o) + + def parse(self): + """Run the parsing""" + for name, active in self.config.items(): + if active: + self.options[name]["method"](self) + + def get_misp_event(self): + log.debug("Running MISP expansions") + self.event.run_expansions() + return self.event + def handler(q=False): - # Just in case we have no data + # In case there's no data if q is False: return False - # The return value - r = {'results': []} - - # Load up that JSON q = json.loads(q) - data = base64.b64decode(q.get("data")).decode('utf-8') + data = q['data'] - # If something really weird happened - if not data: - return json.dumps({"success": 0}) + parser = CuckooParser(q['config']) + parser.read_archive(data) + parser.parse() + event = parser.get_misp_event() - data = json.loads(data) - - # Get characteristics of file - targetFile = data['target']['file'] - - # Process the inital binary - processBinary(r, targetFile, initial=True) - - # Get binary information for dropped files - if(data.get('dropped')): - for droppedFile in data['dropped']: - processBinary(r, droppedFile, dropped=True) - - # Add malscore to results - r["results"].append({ - "values": "Malscore: {} ".format(data['malscore']), - "types": "comment", - "categories": "Payload delivery", - "comment": "Cuckoo analysis: MalScore" - }) - - # Add virustotal data, if exists - if(data.get('virustotal')): - processVT(r, data['virustotal']) - - # Add network information, should be improved - processNetwork(r, data['network']) - - # Add behavioral information - processSummary(r, data['behavior']['summary']) - - # Return - return r - - -def processSummary(r, summary): - r["results"].append({ - "values": summary['mutexes'], - "types": "mutex", - "categories": "Artifacts dropped", - "comment": "Cuckoo analysis: Observed mutexes" - }) - - -def processVT(r, virustotal): - category = "Antivirus detection" - comment = "VirusTotal analysis" - - if(virustotal.get('permalink')): - r["results"].append({ - "values": virustotal['permalink'], - "types": "link", - "categories": category, - "comments": comment + " - Permalink" - }) - - if(virustotal.get('total')): - r["results"].append({ - "values": "VirusTotal detection rate {}/{}".format( - virustotal['positives'], - virustotal['total'] - ), - "types": "comment", - "categories": category, - "comment": comment - }) - else: - r["results"].append({ - "values": "Sample not detected on VirusTotal", - "types": "comment", - "categories": category, - "comment": comment - }) - - -def processNetwork(r, network): - category = "Network activity" - - for host in network['hosts']: - r["results"].append({ - "values": host['ip'], - "types": "ip-dst", - "categories": category, - "comment": "Cuckoo analysis: Observed network traffic" - }) - - -def processBinary(r, target, initial=False, dropped=False): - if(initial): - comment = "Cuckoo analysis: Initial file" - category = "Payload delivery" - elif(dropped): - category = "Artifacts dropped" - comment = "Cuckoo analysis: Dropped file" - - r["results"].append({ - "values": target['name'], - "types": "filename", - "categories": category, - "comment": comment - }) - - r["results"].append({ - "values": target['md5'], - "types": "md5", - "categories": category, - "comment": comment - }) - - r["results"].append({ - "values": target['sha1'], - "types": "sha1", - "categories": category, - "comment": comment - }) - - r["results"].append({ - "values": target['sha256'], - "types": "sha256", - "categories": category, - "comment": comment - }) - - r["results"].append({ - "values": target['sha512'], - "types": "sha512", - "categories": category, - "comment": comment - }) - - # todo : add file size? - - if(target.get('guest_paths')): - r["results"].append({ - "values": target['guest_paths'], - "types": "filename", - "categories": "Payload installation", - "comment": comment + " - Path" - }) + event = json.loads(event.to_json()) + results = { + key: event[key] + for key in ('Attribute', 'Object') + if (key in event and event[key]) + } + return {'results': results} def introspection(): - modulesetup = {} - try: - userConfig - modulesetup['userConfig'] = userConfig - except NameError: - pass - try: - inputSource - modulesetup['inputSource'] = inputSource - except NameError: - pass - return modulesetup + userConfig = { + key: o["userConfig"] + for key, o in CuckooParser.options.items() + } + mispattributes['userConfig'] = userConfig + return mispattributes def version(): moduleinfo['config'] = moduleconfig return moduleinfo - - -if __name__ == '__main__': - x = open('test.json', 'r') - q = [] - q['data'] = x.read() - q = base64.base64encode(q) - - handler(q) diff --git a/tests/test_expansions.py b/tests/test_expansions.py index 493cb4d..364f63b 100644 --- a/tests/test_expansions.py +++ b/tests/test_expansions.py @@ -43,7 +43,7 @@ class TestExpansions(unittest.TestCase): query = {"module": "hibp", "email-src": "info@circl.lu"} response = self.misp_modules_post(query) to_check = self.get_values(response) - if to_check == "haveibeenpwned.com API not accessible (HTTP 403)": + if to_check == "haveibeenpwned.com API not accessible (HTTP 401)": self.skipTest(f"haveibeenpwned blocks travis IPs: {response}") self.assertEqual(to_check, 'OK (Not Found)', response)