Merge github.com:MISP/misp-modules into 8ear-add-docker-capabilitites

2019-09-02 12:30:18 +02:00 · 2019-09-02 12:30:18 +02:00 · d55331fc1c
parent a9a4ec3851 30d9567e8c
commit d55331fc1c
12 changed files with 1321 additions and 207 deletions
--- a/.travis.yml
+++ b/.travis.yml
@ -14,8 +14,6 @@ install:
    - sudo apt-get install libzbar0 libzbar-dev libpoppler-cpp-dev
    - pip install pipenv
    - pipenv install --dev
-    # MKDOCS
-    - pip install -r docs/REQUIREMENTS.txt

 script:
    - pipenv run coverage run -m --parallel-mode --source=misp_modules misp_modules.__init__  -l 127.0.0.1 &
@ -35,14 +33,3 @@ script:
 after_success:
    - pipenv run coverage combine .coverage*
    - pipenv run codecov
-    # MKDOCS
-    - make ci_generate_docs
-
-deploy:
-  provider: pages
-  local-dir: site
-  skip-cleanup: true
-  github-token: $GITHUB_TOKEN  # Set in the settings page of your repository, as a secure variable
-  keep-history: true
-  on:
-    branch: master
--- a/README.md
+++ b/README.md
@ -3,24 +3,552 @@
 [![Build Status](https://travis-ci.org/MISP/misp-modules.svg?branch=master)](https://travis-ci.org/MISP/misp-modules)
 [![Coverage Status](https://coveralls.io/repos/github/MISP/misp-modules/badge.svg?branch=master)](https://coveralls.io/github/MISP/misp-modules?branch=master)
 [![codecov](https://codecov.io/gh/MISP/misp-modules/branch/master/graph/badge.svg)](https://codecov.io/gh/MISP/misp-modules)
-[![FOSSA Status](https://app.fossa.io/api/projects/git%2Bgithub.com%2F8ear%2Fmisp-modules.svg?type=shield)](https://app.fossa.io/projects/git%2Bgithub.com%2F8ear%2Fmisp-modules?ref=badge_shield)
-
-## About

 MISP modules are autonomous modules that can be used for expansion and other services in [MISP](https://github.com/MISP/MISP).

 The modules are written in Python 3 following a simple API interface. The objective is to ease the extensions of MISP functionalities
 without modifying core components. The API is available via a simple REST API which is independent from MISP installation or configuration.

-MISP modules support is included in MISP starting from version `2.4.28`.
+MISP modules support is included in MISP starting from version 2.4.28.

-For more information: [Extending MISP with Python modules](https://www.circl.lu/assets/files/misp-training/switch2016/2-misp-modules.pdf) slides from MISP training.
+For more information: [Extending MISP with Python modules](https://www.misp-project.org/misp-training/3.1-misp-modules.pdf) slides from MISP training.

+## Existing MISP modules
+
+### Expansion modules
+
+* [Backscatter.io](misp_modules/modules/expansion/backscatter_io.py) - a hover and expansion module to expand an IP address with mass-scanning observations.
+* [BGP Ranking](misp_modules/modules/expansion/bgpranking.py) - a hover and expansion module to expand an AS number with the ASN description, its history, and position in BGP Ranking.
+* [BTC scam check](misp_modules/modules/expansion/btc_scam_check.py) - An expansion hover module to instantly check if a BTC address has been abused.
+* [BTC transactions](misp_modules/modules/expansion/btc_steroids.py) - An expansion hover module to get a blockchain balance and the transactions from a BTC address in MISP.
+* [CIRCL Passive DNS](misp_modules/modules/expansion/circl_passivedns.py) - a hover and expansion module to expand hostname and IP addresses with passive DNS information.
+* [CIRCL Passive SSL](misp_modules/modules/expansion/circl_passivessl.py) - a hover and expansion module to expand IP addresses with the X.509 certificate seen.
+* [countrycode](misp_modules/modules/expansion/countrycode.py) - a hover module to tell you what country a URL belongs to.
+* [CrowdStrike Falcon](misp_modules/modules/expansion/crowdstrike_falcon.py) - an expansion module to expand using CrowdStrike Falcon Intel Indicator API.
+* [CVE](misp_modules/modules/expansion/cve.py) - a hover module to give more information about a vulnerability (CVE).
+* [CVE advanced](misp_modules/modules/expansion/cve_advanced.py) - An expansion module to query the CIRCL CVE search API for more information about a vulnerability (CVE).
+* [Cuckoo submit](misp_modules/modules/expansion/cuckoo_submit.py) - A hover module to submit malware sample, url, attachment, domain to Cuckoo Sandbox.
+* [DBL Spamhaus](misp_modules/modules/expansion/dbl_spamhaus.py) - a hover module to check Spamhaus DBL for a domain name.
+* [DNS](misp_modules/modules/expansion/dns.py) - a simple module to resolve MISP attributes like hostname and domain to expand IP addresses attributes.
+* [docx-enrich](misp_modules/modules/expansion/docx-enrich.py) - an enrichment module to get text out of Word document into MISP (using free-text parser).
+* [DomainTools](misp_modules/modules/expansion/domaintools.py) - a hover and expansion module to get information from [DomainTools](http://www.domaintools.com/) whois.
+* [EUPI](misp_modules/modules/expansion/eupi.py) - a hover and expansion module to get information about an URL from the [Phishing Initiative project](https://phishing-initiative.eu/?lang=en).
+* [Farsight DNSDB Passive DNS](misp_modules/modules/expansion/farsight_passivedns.py) - a hover and expansion module to expand hostname and IP addresses with passive DNS information.
+* [GeoIP](misp_modules/modules/expansion/geoip_country.py) - a hover and expansion module to get GeoIP information from geolite/maxmind.
+* [Greynoise](misp_modules/modules/expansion/greynoise.py) - a hover to get information from greynoise.
+* [hashdd](misp_modules/modules/expansion/hashdd.py) - a hover module to check file hashes against [hashdd.com](http://www.hashdd.com) including NSLR dataset.
+* [hibp](misp_modules/modules/expansion/hibp.py) - a hover module to lookup against Have I Been Pwned?
+* [intel471](misp_modules/modules/expansion/intel471.py) - an expansion module to get info from [Intel471](https://intel471.com).
+* [IPASN](misp_modules/modules/expansion/ipasn.py) - a hover and expansion to get the BGP ASN of an IP address.
+* [iprep](misp_modules/modules/expansion/iprep.py) - an expansion module to get IP reputation from packetmail.net.
+* [Joe Sandbox submit](misp_modules/modules/expansion/joesandbox_submit.py) - Submit files and URLs to Joe Sandbox.
+* [Joe Sandbox query](misp_modules/modules/expansion/joesandbox_query.py) - Query Joe Sandbox with the link of an analysis and get the parsed data.
+* [macaddress.io](misp_modules/modules/expansion/macaddress_io.py) - a hover module to retrieve vendor details and other information regarding a given MAC address or an OUI from [MAC address Vendor Lookup](https://macaddress.io). See [integration tutorial here](https://macaddress.io/integrations/MISP-module).
+* [macvendors](misp_modules/modules/expansion/macvendors.py) - a hover module to retrieve mac vendor information.
+* [ocr-enrich](misp_modules/modules/expansion/ocr-enrich.py) - an enrichment module to get OCRized data from images into MISP.
+* [ods-enrich](misp_modules/modules/expansion/ods-enrich.py) - an enrichment module to get text out of OpenOffice spreadsheet document into MISP (using free-text parser).
+* [odt-enrich](misp_modules/modules/expansion/odt-enrich.py) - an enrichment module to get text out of OpenOffice document into MISP (using free-text parser).
+* [onyphe](misp_modules/modules/expansion/onyphe.py) - a modules to process queries on Onyphe.
+* [onyphe_full](misp_modules/modules/expansion/onyphe_full.py) - a modules to process full queries on Onyphe.
+* [OTX](misp_modules/modules/expansion/otx.py) - an expansion module for [OTX](https://otx.alienvault.com/).
+* [passivetotal](misp_modules/modules/expansion/passivetotal.py) - a [passivetotal](https://www.passivetotal.org/) module that queries a number of different PassiveTotal datasets.
+* [pdf-enrich](misp_modules/modules/expansion/pdf-enrich.py) - an enrichment module to extract text from PDF into MISP (using free-text parser).
+* [pptx-enrich](misp_modules/modules/expansion/pptx-enrich.py) - an enrichment module to get text out of PowerPoint document into MISP (using free-text parser).
+* [qrcode](misp_modules/modules/expansion/qrcode.py) - a module decode QR code, barcode and similar codes from an image and enrich with the decoded values.
+* [rbl](misp_modules/modules/expansion/rbl.py) - a module to get RBL (Real-Time Blackhost List) values from an attribute.
+* [reversedns](misp_modules/modules/expansion/reversedns.py) - Simple Reverse DNS expansion service to resolve reverse DNS from MISP attributes.
+* [securitytrails](misp_modules/modules/expansion/securitytrails.py) - an expansion module for [securitytrails](https://securitytrails.com/).
+* [shodan](misp_modules/modules/expansion/shodan.py) - a minimal [shodan](https://www.shodan.io/) expansion module.
+* [Sigma queries](misp_modules/modules/expansion/sigma_queries.py) - Experimental expansion module querying a sigma rule to convert it into all the available SIEM signatures.
+* [Sigma syntax validator](misp_modules/modules/expansion/sigma_syntax_validator.py) - Sigma syntax validator.
+* [sourcecache](misp_modules/modules/expansion/sourcecache.py) - a module to cache a specific link from a MISP instance.
+* [STIX2 pattern syntax validator](misp_modules/modules/expansion/stix2_pattern_syntax_validator.py) - a module to check a STIX2 pattern syntax.
+* [ThreatCrowd](misp_modules/modules/expansion/threatcrowd.py) - an expansion module for [ThreatCrowd](https://www.threatcrowd.org/).
+* [threatminer](misp_modules/modules/expansion/threatminer.py) - an expansion module to expand from [ThreatMiner](https://www.threatminer.org/).
+* [urlhaus](misp_modules/modules/expansion/urlhaus.py) - Query urlhaus to get additional data about a domain, hash, hostname, ip or url.
+* [urlscan](misp_modules/modules/expansion/urlscan.py) - an expansion module to query [urlscan.io](https://urlscan.io).
+* [virustotal](misp_modules/modules/expansion/virustotal.py) - an expansion module to query the [VirusTotal](https://www.virustotal.com/gui/home) API with a high request rate limit required. (More details about the API: [here](https://developers.virustotal.com/reference))
+* [virustotal_public](misp_modules/modules/expansion/virustotal_public.py) - an expansion module to query the [VirusTotal](https://www.virustotal.com/gui/home) API with a public key and a low request rate limit. (More details about the API: [here](https://developers.virustotal.com/reference))
+* [VMray](misp_modules/modules/expansion/vmray_submit.py) - a module to submit a sample to VMray.
+* [VulnDB](misp_modules/modules/expansion/vulndb.py) - a module to query [VulnDB](https://www.riskbasedsecurity.com/).
+* [Vulners](misp_modules/modules/expansion/vulners.py) - an expansion module to expand information about CVEs using Vulners API.
+* [whois](misp_modules/modules/expansion/whois.py) - a module to query a local instance of [uwhois](https://github.com/rafiot/uwhoisd).
+* [wikidata](misp_modules/modules/expansion/wiki.py) - a [wikidata](https://www.wikidata.org) expansion module.
+* [xforce](misp_modules/modules/expansion/xforceexchange.py) - an IBM X-Force Exchange expansion module.
+* [xlsx-enrich](misp_modules/modules/expansion/xlsx-enrich.py) - an enrichment module to get text out of an Excel document into MISP (using free-text parser).
+* [YARA query](misp_modules/modules/expansion/yara_query.py) - a module to create YARA rules from single hash attributes.
+* [YARA syntax validator](misp_modules/modules/expansion/yara_syntax_validator.py) - YARA syntax validator.
+
+### Export modules
+
+* [CEF](misp_modules/modules/export_mod/cef_export.py) module to export Common Event Format (CEF).
+* [Cisco FireSight Manager ACL rule](misp_modules/modules/export_mod/cisco_firesight_manager_ACL_rule_export.py) module to export as rule for the Cisco FireSight manager ACL.
+* [GoAML export](misp_modules/modules/export_mod/goamlexport.py) module to export in [GoAML format](http://goaml.unodc.org/goaml/en/index.html).
+* [Lite Export](misp_modules/modules/export_mod/liteexport.py) module to export a lite event.
+* [PDF export](misp_modules/modules/export_mod/pdfexport.py) module to export an event in PDF.
+* [Nexthink query format](misp_modules/modules/export_mod/nexthinkexport.py) module to export in Nexthink query format.
+* [osquery](misp_modules/modules/export_mod/osqueryexport.py) module to export in [osquery](https://osquery.io/) query format.
+* [ThreatConnect](misp_modules/modules/export_mod/threat_connect_export.py) module to export in ThreatConnect CSV format.
+* [ThreatStream](misp_modules/modules/export_mod/threatStream_misp_export.py) module to export in ThreatStream format.
+
+### Import modules
+
+* [CSV import](misp_modules/modules/import_mod/csvimport.py) Customizable CSV import module.
+* [Cuckoo JSON](misp_modules/modules/import_mod/cuckooimport.py) Cuckoo JSON import.
+* [Email Import](misp_modules/modules/import_mod/email_import.py) Email import module for MISP to import basic metadata.
+* [GoAML import](misp_modules/modules/import_mod/goamlimport.py) Module to import [GoAML](http://goaml.unodc.org/goaml/en/index.html) XML format.
+* [Joe Sandbox import](misp_modules/modules/import_mod/joe_import.py) Parse data from a Joe Sandbox json report.
+* [OCR](misp_modules/modules/import_mod/ocr.py) Optical Character Recognition (OCR) module for MISP to import attributes from images, scan or faxes.
+* [OpenIOC](misp_modules/modules/import_mod/openiocimport.py) OpenIOC import based on PyMISP library.
+* [ThreatAnalyzer](misp_modules/modules/import_mod/threatanalyzer_import.py) - An import module to process ThreatAnalyzer archive.zip/analysis.json sandbox exports.
+* [VMRay](misp_modules/modules/import_mod/vmray_import.py) - An import module to process VMRay export.
+
+## How to install and start MISP modules in a Python virtualenv? (recommended)
+
+~~~~bash
+sudo apt-get install python3-dev python3-pip libpq5 libjpeg-dev tesseract-ocr libpoppler-cpp-dev imagemagick virtualenv libopencv-dev zbar-tools libzbar0 libzbar-dev libfuzzy-dev -y
+sudo -u www-data virtualenv -p python3 /var/www/MISP/venv
+cd /usr/local/src/
+sudo git clone https://github.com/MISP/misp-modules.git
+cd misp-modules
+sudo -u www-data /var/www/MISP/venv/bin/pip install -I -r REQUIREMENTS
+sudo -u www-data /var/www/MISP/venv/bin/pip install .
+# Start misp-modules as a service
+sudo cp etc/systemd/system/misp-modules.service /etc/systemd/system/
+sudo systemctl daemon-reload
+sudo systemctl enable --now misp-modules
+/var/www/MISP/venv/bin/misp-modules -l 127.0.0.1 -s & #to start the modules
+~~~~
+
+## How to install and start MISP modules on RHEL-based distributions ?
+As of this writing, the official RHEL repositories only contain Ruby 2.0.0 and Ruby 2.1 or higher is required. As such, this guide installs Ruby 2.2 from the [SCL](https://access.redhat.com/documentation/en-us/red_hat_software_collections/3/html/3.2_release_notes/chap-installation#sect-Installation-Subscribe) repository. 
+
+~~~~bash
+sudo yum install rh-ruby22
+sudo yum install openjpeg-devel
+sudo yum install rubygem-rouge rubygem-asciidoctor zbar-devel opencv-devel gcc-c++ pkgconfig poppler-cpp-devel python-devel redhat-rpm-config
+cd /var/www/MISP
+git clone https://github.com/MISP/misp-modules.git
+cd misp-modules
+sudo -u apache /usr/bin/scl enable rh-python36 "virtualenv -p python3 /var/www/MISP/venv"
+sudo -u apache /var/www/MISP/venv/bin/pip install -U -I -r REQUIREMENTS
+sudo -u apache /var/www/MISP/venv/bin/pip install -U .
+~~~~
+
+Create the service file /etc/systemd/system/misp-modules.service :
+~~~~
+echo "[Unit]
+Description=MISP's modules
+After=misp-workers.service
+
+[Service]
+Type=simple
+User=apache
+Group=apache
+ExecStart=/usr/bin/scl enable rh-python36 rh-ruby22  '/var/www/MISP/venv/bin/misp-modules –l 127.0.0.1 –s'
+Restart=always
+RestartSec=10
+
+[Install]
+WantedBy=multi-user.target" | sudo tee /etc/systemd/system/misp-modules.service
+~~~~
+
+The `After=misp-workers.service` must be changed or removed if you have not created a misp-workers service.
+Then, enable the misp-modules service and start it:
+~~~~bash
+systemctl daemon-reload
+systemctl enable --now misp-modules
+~~~~
+
+## How to add your own MISP modules?
+
+Create your module in [misp_modules/modules/expansion/](misp_modules/modules/expansion/), [misp_modules/modules/export_mod/](misp_modules/modules/export_mod/), or [misp_modules/modules/import_mod/](misp_modules/modules/import_mod/). The module should have at minimum three functions:
+
+* **introspection** function that returns a dict of the supported attributes (input and output) by your expansion module.
+* **handler** function which accepts a JSON document to expand the values and return a dictionary of the expanded values.
+* **version** function that returns a dict with the version and the associated meta-data including potential configurations required of the module.
+
+Don't forget to return an error key and value if an error is raised to propagate it to the MISP user-interface.
+
+Your module's script name should also be added in the `__all__` list of `<module type folder>/__init__.py` in order for it to be loaded.
+
+~~~python
+...
+    # Checking for required value
+    if not request.get('ip-src'):
+        # Return an error message
+        return {'error': "A source IP is required"}
+...
+~~~
+
+
+### introspection
+
+The function that returns a dict of the supported attributes (input and output) by your expansion module.
+
+~~~python
+mispattributes = {'input': ['link', 'url'],
+                  'output': ['attachment', 'malware-sample']}
+
+def introspection():
+    return mispattributes
+~~~
+
+### version
+
+The function that returns a dict with the version and the associated meta-data including potential configurations required of the module.
+
+
+### Additional Configuration Values
+
+If your module requires additional configuration (to be exposed via the MISP user-interface), you can define those in the moduleconfig value returned by the version function.
+
+~~~python
+# config fields that your code expects from the site admin
+moduleconfig = ["apikey", "event_limit"]
+
+def version():
+    moduleinfo['config'] = moduleconfig
+    return moduleinfo
+~~~
+
+
+When you do this a config array is added to the meta-data output containing all the potential configuration values:
+
+~~~
+"meta": {
+      "description": "PassiveTotal expansion service to expand values with multiple Passive DNS sources",
+      "config": [
+        "username",
+        "password"
+      ],
+      "module-type": [
+        "expansion",
+        "hover"
+      ],
+
+...
+~~~
+
+
+If you want to use the configuration values set in the web interface they are stored in the key `config` in the JSON object passed to the handler.
+
+~~~
+def handler(q=False):
+
+    # Check if we were given a configuration
+    config = q.get("config", {})
+
+    # Find out if there is a username field
+    username = config.get("username", None)
+~~~
+
+
+### handler
+
+The function which accepts a JSON document to expand the values and return a dictionary of the expanded values.
+
+~~~python
+def handler(q=False):
+    "Fully functional rot-13 encoder"
+    if q is False:
+        return False
+    request = json.loads(q)
+    src = request.get('ip-src')
+    if src is None:
+        # Return an error message
+        return {'error': "A source IP is required"}
+    else:
+        return {'results':
+                codecs.encode(src, "rot-13")}
+~~~
+
+#### export module
+
+For an export module, the `request["data"]` object corresponds to a list of events (dictionaries) to handle.
+
+Iterating over events attributes is performed using their `Attribute` key.
+
+~~~python
+...
+for event in request["data"]:
+        for attribute in event["Attribute"]:
+          # do stuff w/ attribute['type'], attribute['value'], ...
+...
+
+### Returning Binary Data
+
+If you want to return a file or other data you need to add a data attribute.
+
+~~~python
+{"results": {"values": "filename.txt",
+             "types": "attachment",
+             "data"  : base64.b64encode(<ByteIO>)  # base64 encode your data first
+             "comment": "This is an attachment"}}
+~~~
+
+If the binary file is malware you can use 'malware-sample' as the type. If you do this the malware sample will be automatically zipped and password protected ('infected') after being uploaded.
+
+
+~~~python
+{"results": {"values": "filename.txt",
+             "types": "malware-sample",
+             "data"  : base64.b64encode(<ByteIO>)  # base64 encode your data first
+             "comment": "This is an attachment"}}
+~~~
+
+[To learn more about how data attributes are processed you can read the processing code here.](https://github.com/MISP/PyMISP/blob/4f230c9299ad9d2d1c851148c629b61a94f3f117/pymisp/mispevent.py#L185-L200)
+
+
+### Module type
+
+A MISP module can be of four types:
+
+- **expansion** - service related to an attribute that can be used to extend and update an existing event.
+- **hover** - service related to an attribute to provide additional information to the users without updating the event.
+- **import** - service related to importing and parsing an external object that can be used to extend an existing event.
+- **export** - service related to exporting an object, event, or data.
+
+module-type is an array where the list of supported types can be added.
+
+## Testing your modules?
+
+MISP uses the **modules** function to discover the available MISP modules and their supported MISP attributes:
+
+~~~
+% curl -s http://127.0.0.1:6666/modules | jq .
+[
+  {
+    "name": "passivetotal",
+    "type": "expansion",
+    "mispattributes": {
+      "input": [
+        "hostname",
+        "domain",
+        "ip-src",
+        "ip-dst"
+      ],
+      "output": [
+        "ip-src",
+        "ip-dst",
+        "hostname",
+        "domain"
+      ]
+    },
+    "meta": {
+      "description": "PassiveTotal expansion service to expand values with multiple Passive DNS sources",
+      "config": [
+        "username",
+        "password"
+      ],
+      "author": "Alexandre Dulaunoy",
+      "version": "0.1"
+    }
+  },
+  {
+    "name": "sourcecache",
+    "type": "expansion",
+    "mispattributes": {
+      "input": [
+        "link"
+      ],
+      "output": [
+        "link"
+      ]
+    },
+    "meta": {
+      "description": "Module to cache web pages of analysis reports, OSINT sources. The module returns a link of the cached page.",
+      "author": "Alexandre Dulaunoy",
+      "version": "0.1"
+    }
+  },
+  {
+    "name": "dns",
+    "type": "expansion",
+    "mispattributes": {
+      "input": [
+        "hostname",
+        "domain"
+      ],
+      "output": [
+        "ip-src",
+        "ip-dst"
+      ]
+    },
+    "meta": {
+      "description": "Simple DNS expansion service to resolve IP address from MISP attributes",
+      "author": "Alexandre Dulaunoy",
+      "version": "0.1"
+    }
+  }
+]
+
+~~~
+
+The MISP module service returns the available modules in a JSON array containing each module name along with their supported input attributes.
+
+Based on this information, a query can be built in a JSON format and saved as body.json:
+
+~~~json
+{
+  "hostname": "www.foo.be",
+  "module": "dns"
+}
+~~~
+
+Then you can POST this JSON format query towards the MISP object server:
+
+~~~bash
+curl -s http://127.0.0.1:6666/query -H "Content-Type: application/json" --data @body.json -X POST
+~~~
+
+The module should output the following JSON:
+
+~~~json
+{
+  "results": [
+    {
+      "types": [
+        "ip-src",
+        "ip-dst"
+      ],
+      "values": [
+        "188.65.217.78"
+      ]
+    }
+  ]
+}
+~~~
+
+It is also possible to restrict the category options of the resolved attributes by passing a list of categories along (optional):
+
+~~~json
+{
+  "results": [
+    {
+      "types": [
+        "ip-src",
+        "ip-dst"
+      ],
+      "values": [
+        "188.65.217.78"
+      ],
+      "categories": [
+        "Network activity",
+        "Payload delivery"
+      ]
+    }
+  ]
+}
+~~~
+
+For both the type and the category lists, the first item in the list will be the default setting on the interface.
+
+### Enable your module in the web interface
+
+For a module to be activated in the MISP web interface it must be enabled in the "Plugin Settings.
+
+Go to "Administration > Server Settings" in the top menu
+- Go to "Plugin Settings" in the top "tab menu bar"
+- Click on the name of the type of module you have created to expand the list of plugins to show your module.
+- Find the name of your plugin's "enabled" value in the Setting Column.
+"Plugin.[MODULE NAME]_enabled"
+- Double click on its "Value" column
+
+~~~
+Priority        Setting                         Value   Description                             Error Message
+Recommended     Plugin.Import_ocr_enabled       false   Enable or disable the ocr module.       Value not set.
+~~~
+
+- Use the drop-down to set the enabled value to 'true'
+
+~~~
+Priority        Setting                         Value   Description                             Error Message
+Recommended     Plugin.Import_ocr_enabled       true   Enable or disable the ocr module.       Value not set.
+~~~
+
+### Set any other required settings for your module
+
+In this same menu set any other plugin settings that are required for testing.
+
+## Install misp-module on an offline instance.
+First, you need to grab all necessary packages for example like this :
+
+Use pip wheel to create an archive
+~~~
+mkdir misp-modules-offline
+pip3 wheel -r REQUIREMENTS shodan --wheel-dir=./misp-modules-offline
+tar -cjvf misp-module-bundeled.tar.bz2 ./misp-modules-offline/*
+~~~
+On offline machine :
+~~~
+mkdir misp-modules-bundle
+tar xvf misp-module-bundeled.tar.bz2 -C misp-modules-bundle
+cd misp-modules-bundle
+ls -1|while read line; do sudo pip3 install --force-reinstall --ignore-installed --upgrade --no-index --no-deps ${line};done
+~~~
+Next you can follow standard install procedure.
+
+## How to contribute your own module?
+
+Fork the project, add your module, test it and make a pull-request. Modules can be also private as you can add a module in your own MISP installation.
+
+
+## Tips for developers creating modules
+
+Download a pre-built virtual image from the [MISP training materials](https://www.circl.lu/services/misp-training-materials/).
+
+- Create a Host-Only adapter in VirtualBox
+- Set your Misp OVA to that Host-Only adapter
+- Start the virtual machine
+- Get the IP address of the virtual machine
+- SSH into the machine (Login info on training page)
+- Go into the misp-modules directory
+
+~~~bash
+cd /usr/local/src/misp-modules
+~~~
+
+Set the git repo to your fork and checkout your development branch. If you SSH'ed in as the misp user you will have to use sudo.
+
+~~~bash
+sudo git remote set-url origin https://github.com/YourRepo/misp-modules.git
+sudo git pull
+sudo git checkout MyModBranch
+~~~
+
+Remove the contents of the build directory and re-install misp-modules.
+
+~~~bash
+sudo rm -fr build/*
+sudo -u www-data /var/www/MISP/venv/bin/pip install --upgrade .
+~~~
+
+SSH in with a different terminal and run `misp-modules` with debugging enabled.
+
+~~~bash
+# In case misp-modules is not a service do:
+# sudo killall misp-modules
+sudo systemctl disable --now misp-modules
+sudo -u www-data /var/www/MISP/venv/bin/misp-modules -d
+~~~
+
+
+In your original terminal you can now run your tests manually and see any errors that arrive
+
+~~~bash
+cd tests/
+curl -s http://127.0.0.1:6666/query -H "Content-Type: application/json" --data @MY_TEST_FILE.json -X POST
+cd ../
+~~~

 ## Documentation

-The new documentation can found [here](https://misp.github.io/misp-modules).
+In order to provide documentation about some modules that require specific input / output / configuration, the [doc](doc) directory contains detailed information about the general purpose, requirements, features, input and ouput of each of these modules:

-
-## License
-[![FOSSA Status](https://app.fossa.io/api/projects/git%2Bgithub.com%2F8ear%2Fmisp-modules.svg?type=large)](https://app.fossa.io/projects/git%2Bgithub.com%2F8ear%2Fmisp-modules?ref=badge_large)
+- ***description** - quick description of the general purpose of the module, as the one given by the moduleinfo
+- **requirements** - special libraries needed to make the module work
+- **features** - description of the way to use the module, with the required MISP features to make the module give the intended result
+- **references** - link(s) giving additional information about the format concerned in the module
+- **input** - description of the format of data used in input
+- **output** - description of the format given as the result of the module execution
--- a/2
+++ b/2
@ -3,7 +3,7 @@
 -e git+https://github.com/D4-project/BGP-Ranking.git/@429cea9c0787876820984a2df4e982449a84c10e#egg=pybgpranking&subdirectory=client
 -e git+https://github.com/D4-project/IPASN-History.git/@47cd0f2658ab172fce42126ff3a1dbcddfb0b5fb#egg=pyipasnhistory&subdirectory=client
 -e git+https://github.com/MISP/PyIntel471.git@0df8d51f1c1425de66714b3a5a45edb69b8cc2fc#egg=pyintel471
-e git+https://github.com/MISP/PyMISP.git@583fb6592495ea358aad47a8a1ec92d43c13348a#egg=pymisp
+-e git+https://github.com/MISP/PyMISP.git@3ad351380055f0a655ed529b9c79b242a9227b84#egg=pymisp
 -e git+https://github.com/Rafiot/uwhoisd.git@411572840eba4c72dc321c549b36a54ed5cea9de#egg=uwhois&subdirectory=client
 -e git+https://github.com/cartertemm/ODTReader.git/@49d6938693f6faa3ff09998f86dba551ae3a996b#egg=odtreader
 -e git+https://github.com/sebdraven/pydnstrails@48c1f740025c51289f43a24863d1845ff12fd21a#egg=pydnstrails
--- a/docs/contribute.md
+++ b/docs/contribute.md
@ -322,8 +322,9 @@ In order to provide documentation about some modules that require specific input
 - **input** - description of the format of data used in input
 - **output** - description of the format given as the result of the module execution

-In addition to the modul documentation please add your module to [docs/index.md](https://github.com/MISP/misp-modules/tree/master/docs/index.md).
+In addition to the module documentation please add your module to [docs/index.md](https://github.com/MISP/misp-modules/tree/master/docs/index.md).

+There are also [complementary slides](https://www.misp-project.org/misp-training/3.1-misp-modules.pdf) for the creation of MISP modules.


 ## Tips for developers creating modules
--- a/misp_modules/lib/joe_parser.py
+++ b/misp_modules/lib/joe_parser.py
@ -405,7 +405,7 @@ class JoeParser():
    def finalize_results(self):
        if self.references:
            self.build_references()
-        event = json.loads(self.misp_event.to_json())['Event']
+        event = json.loads(self.misp_event.to_json())
        self.results = {key: event[key] for key in ('Attribute', 'Object', 'Tag') if (key in event and event[key])}

    @staticmethod
--- a/misp_modules/modules/expansion/cve_advanced.py
+++ b/misp_modules/modules/expansion/cve_advanced.py
@ -1,3 +1,4 @@
+from collections import defaultdict
 from pymisp import MISPEvent, MISPObject
 import json
 import requests
@ -12,17 +13,25 @@ cveapi_url = 'https://cve.circl.lu/api/cve/'


 class VulnerabilityParser():
-    def __init__(self, vulnerability):
+    def __init__(self, attribute, vulnerability):
+        self.attribute = attribute
        self.vulnerability = vulnerability
        self.misp_event = MISPEvent()
+        self.misp_event.add_attribute(**attribute)
+        self.references = defaultdict(list)
+        self.capec_features = ('id', 'name', 'summary', 'prerequisites', 'solutions')
        self.vulnerability_mapping = {
            'id': ('text', 'id'), 'summary': ('text', 'summary'),
            'vulnerable_configuration_cpe_2_2': ('text', 'vulnerable_configuration'),
            'Modified': ('datetime', 'modified'), 'Published': ('datetime', 'published'),
            'references': ('link', 'references'), 'cvss': ('float', 'cvss-score')}
+        self.weakness_mapping = {'name': 'name', 'description_summary': 'description',
+                                 'status': 'status', 'weaknessabs': 'weakness-abs'}

    def get_result(self):
-        event = json.loads(self.misp_event.to_json())['Event']
+        if self.references:
+            self.__build_references()
+        event = json.loads(self.misp_event.to_json())
        results = {key: event[key] for key in ('Attribute', 'Object') if (key in event and event[key])}
        return {'results': results}

@ -41,7 +50,50 @@ class VulnerabilityParser():
                attribute_type, relation = self.vulnerability_mapping[feature]
                for value in self.vulnerability[feature]:
                    vulnerability_object.add_attribute(relation, **{'type': attribute_type, 'value': value})
+        vulnerability_object.add_reference(self.attribute['uuid'], 'related-to')
        self.misp_event.add_object(**vulnerability_object)
+        if 'cwe' in self.vulnerability and self.vulnerability['cwe'] != 'Unknown':
+            self.__parse_weakness(vulnerability_object.uuid)
+        if 'capec' in self.vulnerability:
+            self.__parse_capec(vulnerability_object.uuid)
+
+    def __build_references(self):
+        for object_uuid, references in self.references.items():
+            for misp_object in self.misp_event.objects:
+                if misp_object.uuid == object_uuid:
+                    for reference in references:
+                        misp_object.add_reference(**reference)
+                    break
+
+    def __parse_capec(self, vulnerability_uuid):
+        attribute_type = 'text'
+        for capec in self.vulnerability['capec']:
+            capec_object = MISPObject('attack-pattern')
+            for feature in self.capec_features:
+                capec_object.add_attribute(feature, **dict(type=attribute_type, value=capec[feature]))
+            for related_weakness in capec['related_weakness']:
+                attribute = dict(type='weakness', value="CWE-{}".format(related_weakness))
+                capec_object.add_attribute('related-weakness', **attribute)
+            self.misp_event.add_object(**capec_object)
+            self.references[vulnerability_uuid].append(dict(referenced_uuid=capec_object.uuid,
+                                                            relationship_type='targeted-by'))
+
+    def __parse_weakness(self, vulnerability_uuid):
+        attribute_type = 'text'
+        cwe_string, cwe_id = self.vulnerability['cwe'].split('-')
+        cwes = requests.get(cveapi_url.replace('/cve/', '/cwe'))
+        if cwes.status_code == 200:
+            for cwe in cwes.json():
+                if cwe['id'] == cwe_id:
+                    weakness_object = MISPObject('weakness')
+                    weakness_object.add_attribute('id', **dict(type=attribute_type, value='-'.join([cwe_string, cwe_id])))
+                    for feature, relation in self.weakness_mapping.items():
+                        if cwe.get(feature):
+                            weakness_object.add_attribute(relation, **dict(type=attribute_type, value=cwe[feature]))
+                    self.misp_event.add_object(**weakness_object)
+                    self.references[vulnerability_uuid].append(dict(referenced_uuid=weakness_object.uuid,
+                                                                    relationship_type='weakened-by'))
+                    break


 def handler(q=False):
@ -61,7 +113,7 @@ def handler(q=False):
    else:
        misperrors['error'] = 'cve.circl.lu API not accessible'
        return misperrors['error']
-    parser = VulnerabilityParser(vulnerability)
+    parser = VulnerabilityParser(attribute, vulnerability)
    parser.parse_vulnerability_information()
    return parser.get_result()

--- a/misp_modules/modules/expansion/urlhaus.py
+++ b/misp_modules/modules/expansion/urlhaus.py
@ -31,7 +31,7 @@ class URLhaus():
        return vt_object

    def get_result(self):
-        event = json.loads(self.misp_event.to_json())['Event']
+        event = json.loads(self.misp_event.to_json())
        results = {key: event[key] for key in ('Attribute', 'Object') if (key in event and event[key])}
        return {'results': results}

--- a/misp_modules/modules/expansion/virustotal.py
+++ b/misp_modules/modules/expansion/virustotal.py
@ -35,7 +35,7 @@ class VirusTotalParser(object):
        return self.input_types_mapping[self.attribute.type](self.attribute.value, recurse=True)

    def get_result(self):
-        event = json.loads(self.misp_event.to_json())['Event']
+        event = json.loads(self.misp_event.to_json())
        results = {key: event[key] for key in ('Attribute', 'Object') if (key in event and event[key])}
        return {'results': results}

--- a/misp_modules/modules/expansion/virustotal_public.py
+++ b/misp_modules/modules/expansion/virustotal_public.py
@ -23,7 +23,7 @@ class VirusTotalParser():
        self.apikey = apikey

    def get_result(self):
-        event = json.loads(self.misp_event.to_json())['Event']
+        event = json.loads(self.misp_event.to_json())
        results = {key: event[key] for key in ('Attribute', 'Object') if (key in event and event[key])}
        return {'results': results}

--- a/misp_modules/modules/import_mod/csvimport.py
+++ b/misp_modules/modules/import_mod/csvimport.py
@ -194,7 +194,7 @@ class CsvParser():
        return list2pop, misp, list(reversed(head))

    def finalize_results(self):
-        event = json.loads(self.misp_event.to_json())['Event']
+        event = json.loads(self.misp_event.to_json())
        self.results = {key: event[key] for key in ('Attribute', 'Object') if (key in event and event[key])}


--- a/misp_modules/modules/import_mod/cuckooimport.py
+++ b/misp_modules/modules/import_mod/cuckooimport.py
@ -1,198 +1,744 @@
 import json
 import base64
+import io
+import logging
+import posixpath
+import stat
+import tarfile
+import zipfile
+from pymisp import MISPEvent, MISPObject, MISPAttribute
+from pymisp.tools import make_binary_objects
+from collections import OrderedDict
+
+log = logging.getLogger(__name__)

 misperrors = {'error': 'Error'}
-userConfig = {}
-inputSource = ['file']

-moduleinfo = {'version': '0.1', 'author': 'Victor van der Stoep',
-              'description': 'Cuckoo JSON import',
-              'module-type': ['import']}
+moduleinfo = {
+    'version': '1.1',
+    'author': 'Pierre-Jean Grenier',
+    'description': "Import a Cuckoo archive (zipfile or bzip2 tarball), "
+                   "either downloaded manually or exported from the "
+                   "API (/tasks/report/{task_id}/all).",
+    'module-type': ['import'],
+}

 moduleconfig = []

+mispattributes = {
+    'inputSource': ['file'],
+    'output': ['MISP objects', 'malware-sample'],
+    'format': 'misp_standard',
+}
+
+# Attributes for which we can set the "Artifacts dropped"
+# category if we want to
+ARTIFACTS_DROPPED = (
+    "filename",
+    "md5",
+    "sha1",
+    "sha256",
+    "sha512",
+    "malware-sample",
+    "mimetype",
+    "ssdeep",
+)
+
+# Same for the category "Payload delivery"
+PAYLOAD_DELIVERY = ARTIFACTS_DROPPED
+
+
+class PrettyDict(OrderedDict):
+    """
+    This class is just intended for a pretty print
+    of its keys and values.
+    """
+    MAX_SIZE = 30
+
+    def __str__(self):
+        tmp = []
+        for k, v in self.items():
+            v = str(v)
+            if len(v) > self.MAX_SIZE:
+                k += ',cut'
+                v = v[:self.MAX_SIZE]
+            v.replace('\n', ' ')
+            tmp.append((k, v))
+        return "; ".join(f"({k}) {v}" for k, v in tmp)
+
+
+def search_objects(event, name, attributes=[]):
+    """
+    Search for objects in event, which name is `name` and
+    contain at least the attributes given.
+    Return a generator.
+    @ param attributes: a list of (object_relation, value)
+    """
+    match = filter(
+        lambda obj: all(
+            obj.name == name
+            and (obj_relation, str(attr_value)) in map(
+                lambda attr: (attr.object_relation, str(attr.value)),
+                obj.attributes
+            )
+            for obj_relation, attr_value in attributes
+        ), event.objects
+    )
+    return match
+
+
+def find_process_by_pid(event, pid):
+    """
+    Find a 'process' MISPObject by its PID. If multiple objects are found,
+    only return the first one.
+    @ param pid: integer or str
+    """
+    generator = search_objects(
+        event,
+        "process",
+        (('pid', pid),)
+    )
+    return next(generator, None)
+
+
+class CuckooParser():
+    # This dict is used to generate the userConfig and link the different
+    # options to the corresponding method of the parser. This way, we avoid
+    # redundancy and make future changes easier (instead of for instance
+    # defining all the options in userConfig directly, and then making a
+    # switch when running the parser).
+    # Careful about the order here, as we create references between
+    # MISPObjects/MISPAttributes at the same time we generate them.
+    # Hence when we create object B, which we want to reference to
+    # object A, we should already have created object A.
+    # TODO create references only after all parsing is done
+    options = {
+        "Sandbox info": {
+            "method": lambda self: self.add_sandbox_info(),
+            "userConfig": {
+                'type': 'Boolean',
+                'message': "Add info related to the sandbox",
+                'checked': 'true',
+            },
+        },
+        "Upload sample": {
+            "method": lambda self: self.add_sample(),
+            "userConfig": {
+                'type': 'Boolean',
+                'message': "Upload the sample",
+                'checked': 'true',
+            },
+        },
+        "Processes": {
+            "method": lambda self: self.add_process_tree(),
+            "userConfig": {
+                'type': 'Boolean',
+                'message': "Add info related to the processes",
+                'checked': 'true',
+            },
+        },
+        "DNS": {
+            "method": lambda self: self.add_dns(),
+            "userConfig": {
+                'type': 'Boolean',
+                'message': "Add DNS queries/answers",
+                'checked': 'true',
+            },
+        },
+        "TCP": {
+            "method": lambda self: self.add_network("tcp"),
+            "userConfig": {
+                'type': 'Boolean',
+                'message': "Add TCP connections",
+                'checked': 'true',
+            },
+        },
+        "UDP": {
+            "method": lambda self: self.add_network("udp"),
+            "userConfig": {
+                'type': 'Boolean',
+                'message': "Add UDP connections",
+                'checked': 'true',
+            },
+        },
+        "HTTP": {
+            "method": lambda self: self.add_http(),
+            "userConfig": {
+                'type': 'Boolean',
+                'message': "Add HTTP requests",
+                'checked': 'true',
+            },
+        },
+        "Signatures": {
+            "method": lambda self: self.add_signatures(),
+            "userConfig": {
+                'type': 'Boolean',
+                'message': "Add Cuckoo's triggered signatures",
+                'checked': 'true',
+            },
+        },
+        "Screenshots": {
+            "method": lambda self: self.add_screenshots(),
+            "userConfig": {
+                'type': 'Boolean',
+                'message': "Upload the screenshots",
+                'checked': 'true',
+            },
+        },
+        "Dropped files": {
+            "method": lambda self: self.add_dropped_files(),
+            "userConfig": {
+                'type': 'Boolean',
+                'message': "Upload the dropped files",
+                'checked': 'true',
+            },
+        },
+        "Dropped buffers": {
+            "method": lambda self: self.add_dropped_buffers(),
+            "userConfig": {
+                'type': 'Boolean',
+                'message': "Upload the dropped buffers",
+                'checked': 'true',
+            },
+        },
+    }
+
+    def __init__(self, config):
+        self.event = MISPEvent()
+        self.files = None
+        self.malware_binary = None
+        self.report = None
+        self.config = {
+            # if an option is missing (we receive None as a value),
+            # fall back to the default specified in the options
+            key: int(
+                on if on is not None
+                else self.options[key]["userConfig"]["checked"] == 'true'
+            )
+            for key, on in config.items()
+        }
+
+    def get_file(self, relative_filepath):
+        """Return an io.BufferedIOBase for the corresponding relative_filepath
+        in the Cuckoo archive. If not found, return an empty io.BufferedReader
+        to avoid fatal errors."""
+        blackhole = io.BufferedReader(open('/dev/null', 'rb'))
+        res = self.files.get(relative_filepath, blackhole)
+        if res == blackhole:
+            log.debug(f"Did not find file {relative_filepath}, "
+                      f"returned an empty file instead")
+        return res
+
+    def read_archive(self, archive_encoded):
+        """Read the archive exported from Cuckoo and initialize the class"""
+        # archive_encoded is base 64 encoded content
+        # we extract the info about each file but do not retrieve
+        # it automatically, as it may take too much space in memory
+        buf_io = io.BytesIO(base64.b64decode(archive_encoded))
+        if zipfile.is_zipfile(buf_io):
+            # the archive was probably downloaded from the WebUI
+            buf_io.seek(0)  # don't forget this not to read an empty buffer
+            z = zipfile.ZipFile(buf_io, 'r')
+            self.files = {
+                info.filename: z.open(info)
+                for info in z.filelist
+                # only extract the regular files and dirs, we don't
+                # want any symbolic link
+                if stat.S_ISREG(info.external_attr >> 16)
+                or stat.S_ISDIR(info.external_attr >> 16)
+            }
+        else:
+            # the archive was probably downloaded from the API
+            buf_io.seek(0)  # don't forget this not to read an empty buffer
+            f = tarfile.open(fileobj=buf_io, mode='r:bz2')
+            self.files = {
+                info.name: f.extractfile(info)
+                for info in f.getmembers()
+                # only extract the regular files and dirs, we don't
+                # want any symbolic link
+                if info.isreg() or info.isdir()
+            }
+
+        # We want to keep the order of the keys of sub-dicts in the report,
+        # eg. the signatures have marks with unknown keys such as
+        #     {'marks': [
+        #        {"suspicious_features": "Connection to IP address",
+        #         "suspicious_request": "OPTIONS http://85.20.18.18/doc"}
+        #     ]}
+        # To render those marks properly, we can only hope the developpers
+        # thought about the order in which they put the keys, and keep this
+        # order so that the signature makes sense to the reader.
+        # We use PrettyDict, a customization of OrderedDict to do so.
+        # It will be instanced iteratively when parsing the json (ie. subdicts
+        # will also be instanced as PrettyDict)
+        self.report = json.load(
+            self.get_file("reports/report.json"),
+            object_pairs_hook=PrettyDict,
+        )
+
+    def read_malware(self):
+        self.malware_binary = self.get_file("binary").read()
+        if not self.malware_binary:
+            log.warn("No malware binary found")
+
+    def add_sandbox_info(self):
+        info = self.report.get("info", {})
+        if not info:
+            log.warning("The 'info' field was not found "
+                        "in the report, skipping")
+            return False
+
+        o = MISPObject(name='sandbox-report')
+        o.add_attribute('score', info['score'])
+        o.add_attribute('sandbox-type', 'on-premise')
+        o.add_attribute('on-premise-sandbox', 'cuckoo')
+        o.add_attribute('raw-report',
+                        f'started on:{info["machine"]["started_on"]} '
+                        f'duration:{info["duration"]}s '
+                        f'vm:{info["machine"]["name"]}/'
+                        f'{info["machine"]["label"]}')
+        self.event.add_object(o)
+
+    def add_sample(self):
+        """Add the sample/target of the analysis"""
+        target = self.report.get("target", {})
+        category = target.get("category", "")
+        if not category:
+            log.warning("Could not find info about the sample "
+                        "in the report, skipping")
+            return False
+
+        if category == "file":
+            log.debug("Sample is a file, uploading it")
+            self.read_malware()
+            file_o, bin_type_o, bin_section_li = make_binary_objects(
+                pseudofile=io.BytesIO(self.malware_binary),
+                filename=target["file"]["name"],
+            )
+
+            file_o.comment = "Submitted sample"
+            # fix categories
+            for obj in filter(None, (file_o, bin_type_o, *bin_section_li,)):
+                for attr in obj.attributes:
+                    if attr.type in PAYLOAD_DELIVERY:
+                        attr.category = "Payload delivery"
+                self.event.add_object(obj)
+
+        elif category == "url":
+            log.debug("Sample is a URL")
+            o = MISPObject(name='url')
+            o.add_attribute('url', target['url'])
+            o.add_attribute('text', "Submitted URL")
+            self.event.add_object(o)
+
+    def add_http(self):
+        """Add the HTTP requests"""
+        network = self.report.get("network", [])
+        http = network.get("http", [])
+        if not http:
+            log.info("No HTTP connection found in the report, skipping")
+            return False
+
+        for request in http:
+            o = MISPObject(name='http-request')
+            o.add_attribute('host', request['host'])
+            o.add_attribute('method', request['method'])
+            o.add_attribute('uri', request['uri'])
+            o.add_attribute('user-agent', request['user-agent'])
+            o.add_attribute('text', f"count:{request['count']} "
+                                    f"port:{request['port']}")
+            self.event.add_object(o)
+
+    def add_network(self, proto=None):
+        """
+        Add UDP/TCP traffic
+        proto must be one of "tcp", "udp"
+        """
+        network = self.report.get("network", [])
+        li_conn = network.get(proto, [])
+        if not li_conn:
+            log.info(f"No {proto} connection found in the report, skipping")
+            return False
+
+        from_to = []
+        # sort by time to get the "first packet seen" right
+        li_conn.sort(key=lambda x: x["time"])
+        for conn in li_conn:
+            src = conn['src']
+            dst = conn['dst']
+            sport = conn['sport']
+            dport = conn['dport']
+            if (src, sport, dst, dport) in from_to:
+                continue
+
+            from_to.append((src, sport, dst, dport))
+
+            o = MISPObject(name='network-connection')
+            o.add_attribute('ip-src', src)
+            o.add_attribute('ip-dst', dst)
+            o.add_attribute('src-port', sport)
+            o.add_attribute('dst-port', dport)
+            o.add_attribute('layer3-protocol', "IP")
+            o.add_attribute('layer4-protocol', proto.upper())
+            o.add_attribute('first-packet-seen', conn['time'])
+            self.event.add_object(o)
+
+    def add_dns(self):
+        """Add DNS records"""
+        network = self.report.get("network", [])
+        dns = network.get("dns", [])
+        if not dns:
+            log.info("No DNS connection found in the report, skipping")
+            return False
+
+        for record in dns:
+            o = MISPObject(name='dns-record')
+            o.add_attribute('text', f"request type:{record['type']}")
+            o.add_attribute('queried-domain', record['request'])
+            for answer in record.get("answers", []):
+                if answer["type"] in ("A", "AAAA"):
+                    o.add_attribute('a-record', answer['data'])
+                # TODO implement MX/NS
+
+            self.event.add_object(o)
+
+    def _get_marks_str(self, marks):
+        marks_strings = []
+        for m in marks:
+            m_type = m.pop("type")  # temporarily remove the type
+
+            if m_type == "generic":
+                marks_strings.append(str(m))
+
+            elif m_type == "ioc":
+                marks_strings.append(m['ioc'])
+
+            elif m_type == "call":
+                call = m["call"]
+                arguments = call.get("arguments", {})
+                flags = call.get("flags", {})
+                info = ""
+                for details in (arguments, flags):
+                    info += f" {details}"
+                marks_strings.append(f"Call API '{call['api']}'%s" % info)
+
+            else:
+                logging.debug(f"Unknown mark type '{m_type}', skipping")
+
+            m["type"] = m_type  # restore key 'type'
+            # TODO implemented marks 'config' and 'volatility'
+        return marks_strings
+
+    def _add_ttp(self, attribute, ttp_short, ttp_num):
+        """
+        Internal wrapper to add the TTP tag from the MITRE galaxy.
+        @ params
+            - attribute: MISPAttribute
+            - ttp_short: short description of the TTP
+              (eg. "Credential Dumping")
+            - ttp_num: formatted as "T"+int
+              (eg. T1003)
+        """
+        attribute.add_tag(f'misp-galaxy:mitre-attack-pattern='
+                          f'"{ttp_short} - {ttp_num}"')
+
+    def add_signatures(self):
+        """Add the Cuckoo signatures, with as many details as possible
+        regarding the marks"""
+        signatures = self.report.get("signatures", [])
+        if not signatures:
+            log.info("No signature found in the report")
+            return False
+
+        o = MISPObject(name='sb-signature')
+        o.add_attribute('software', "Cuckoo")
+
+        for sign in signatures:
+            marks = sign["marks"]
+            marks_strings = self._get_marks_str(marks)
+            summary = sign['description']
+            if marks_strings:
+                summary += "\n---\n"
+
+            marks_strings = set(marks_strings)
+            description = summary + "\n".join(marks_strings)
+
+            a = MISPAttribute()
+            a.from_dict(type='text', value=description)
+            for ttp_num, desc in sign.get("ttp", {}).items():
+                ttp_short = desc["short"]
+                self._add_ttp(a, ttp_short, ttp_num)
+
+            # this signature was triggered by the processes with the following
+            # PIDs, we can create references
+            triggered_by_pids = filter(
+                None,
+                (m.get("pid", None) for m in marks)
+            )
+            # remove redundancy
+            triggered_by_pids = set(triggered_by_pids)
+            for pid in triggered_by_pids:
+                process_o = find_process_by_pid(self.event, pid)
+                if process_o:
+                    process_o.add_reference(a, "triggers")
+
+            o.add_attribute('signature', **a)
+
+        self.event.add_object(o)
+
+    def _handle_process(self, proc, accu):
+        """
+        This is an internal recursive function to handle one process
+        from a process tree and then iterate on its children.
+        List the objects to be added, based on the tree, into the `accu` list.
+        The `accu` list uses a DFS-like order.
+        """
+        o = MISPObject(name='process')
+        accu.append(o)
+        o.add_attribute('pid', proc['pid'])
+        o.add_attribute('command-line', proc['command_line'])
+        o.add_attribute('name', proc['process_name'])
+        o.add_attribute('parent-pid', proc['ppid'])
+        for child in proc.get('children', []):
+            pos_child = len(accu)
+            o.add_attribute('child-pid', child['pid'])
+            self._handle_process(child, accu)
+            child_obj = accu[pos_child]
+            child_obj.add_reference(o, 'child-of')
+
+        return o
+
+    def add_process_tree(self):
+        """Add process tree from the report, as separated process objects"""
+        behavior = self.report.get("behavior", {})
+        tree = behavior.get("processtree", [])
+        if not tree:
+            log.warning("No process tree found in the report, skipping")
+            return False
+
+        for proc in tree:
+            objs = []
+            self._handle_process(proc, objs)
+            for o in objs:
+                self.event.add_object(o)
+
+    def get_relpath(self, path):
+        """
+        Transform an absolute or relative path into a path relative to the
+        correct cuckoo analysis directory, without knowing the cuckoo
+        working directory.
+        Return an empty string if the path given does not refer to a
+        file from the analysis directory.
+        """
+        head, tail = posixpath.split(path)
+        if not tail:
+            return ""
+        prev = self.get_relpath(head)
+        longer = posixpath.join(prev, tail)
+        if longer in self.files:
+            return longer
+        elif tail in self.files:
+            return tail
+        else:
+            return ""
+
+    def add_screenshots(self):
+        """Add the screenshots taken by Cuckoo in a sandbox-report object"""
+        screenshots = self.report.get('screenshots', [])
+        if not screenshots:
+            log.info("No screenshot found in the report, skipping")
+            return False
+
+        o = MISPObject(name='sandbox-report')
+        o.add_attribute('sandbox-type', 'on-premise')
+        o.add_attribute('on-premise-sandbox', "cuckoo")
+        for shot in screenshots:
+            # The path given by Cuckoo is an absolute path, but we need a path
+            # relative to the analysis folder.
+            path = self.get_relpath(shot['path'])
+            img = self.get_file(path)
+            # .decode('utf-8') in order to avoid the b'' format
+            img_data = base64.b64encode(img.read()).decode('utf-8')
+            filename = posixpath.basename(path)
+
+            o.add_attribute(
+                "sandbox-file", value=filename,
+                data=img_data, type='attachment',
+                category="External analysis",
+            )
+
+        self.event.add_object(o)
+
+    def _get_dropped_objs(self, path, filename=None, comment=None):
+        """
+        Internal wrapper to get dropped files/buffers as file objects
+        @ params
+            - path: relative to the cuckoo analysis directory
+            - filename: if not specified, deduced from the path
+        """
+        if not filename:
+            filename = posixpath.basename(path)
+
+        dropped_file = self.get_file(path)
+        dropped_binary = io.BytesIO(dropped_file.read())
+        # create ad hoc objects
+        file_o, bin_type_o, bin_section_li = make_binary_objects(
+            pseudofile=dropped_binary, filename=filename,
+        )
+
+        if comment:
+            file_o.comment = comment
+        # fix categories
+        for obj in filter(None, (file_o, bin_type_o, *bin_section_li,)):
+            for attr in obj.attributes:
+                if attr.type in ARTIFACTS_DROPPED:
+                    attr.category = "Artifacts dropped"
+
+        return file_o, bin_type_o, bin_section_li
+
+    def _add_yara(self, obj, yara_dict):
+        """Internal wrapper to add Yara matches to an MISPObject"""
+        for yara in yara_dict:
+            description = yara.get("meta", {}).get("description", "")
+            name = yara.get("name", "")
+            obj.add_attribute(
+                "text",
+                f"Yara match\n(name) {name}\n(description) {description}",
+                comment="Yara match"
+            )
+
+    def add_dropped_files(self):
+        """Upload the dropped files as file objects"""
+        dropped = self.report.get("dropped", [])
+        if not dropped:
+            log.info("No dropped file found, skipping")
+            return False
+
+        for d in dropped:
+            # Cuckoo logs three things that are of interest for us:
+            #   - 'filename' which is not the original name of the file
+            #     but is formatted as follow:
+            #        8 first bytes of SHA265 + _ + original name in lower case
+            #   - 'filepath' which is the original filepath on the VM,
+            #     where the file was dropped
+            #   - 'path' which is the local path of the stored file,
+            #     in the cuckoo archive
+            filename = d.get("name", "")
+            original_path = d.get("filepath", "")
+            sha256 = d.get("sha256", "")
+            if original_path and sha256:
+                log.debug(f"Will now try to restore original filename from "
+                          f"path {original_path}")
+                try:
+                    s = filename.split("_")
+                    if not s:
+                        raise Exception("unexpected filename read "
+                                        "in the report")
+                    sha256_first_8_bytes = s[0]
+                    original_name = s[1]
+                    # check our assumptions are valid, if so we can safely
+                    # restore the filename, if not the format may have changed
+                    # so we'll keep the filename of the report
+                    if sha256.startswith(sha256_first_8_bytes) and \
+                            original_path.lower().endswith(original_name) and \
+                            filename not in original_path.lower():
+                        # we can restore the original case of the filename
+                        position = original_path.lower().rindex(original_name)
+                        filename = original_path[position:]
+                        log.debug(f"Successfully restored original filename: "
+                                  f"{filename}")
+                    else:
+                        raise Exception("our assumptions were wrong, "
+                                        "filename format may have changed")
+                except Exception as e:
+                    log.debug(f"Cannot restore filename: {e}")
+
+            if not filename:
+                filename = "NO NAME FOUND IN THE REPORT"
+                log.warning(f'No filename found for dropped file! '
+                            f'Will use "{filename}"')
+
+            file_o, bin_type_o, bin_section_o = self._get_dropped_objs(
+                self.get_relpath(d['path']),
+                filename=filename,
+                comment="Dropped file"
+            )
+
+            self._add_yara(file_o, d.get("yara", []))
+
+            file_o.add_attribute("fullpath", original_path,
+                                 category="Artifacts dropped")
+
+            # why is this a list? for when various programs drop the same file?
+            for pid in d.get("pids", []):
+                # if we have an object for the process that dropped the file,
+                # we can link the two (we just take the first result from
+                # the search)
+                process_o = find_process_by_pid(self.event, pid)
+                if process_o:
+                    file_o.add_reference(process_o, "dropped-by")
+
+            self.event.add_object(file_o)
+
+    def add_dropped_buffers(self):
+        """"Upload the dropped buffers as file objects"""
+        buffer = self.report.get("buffer", [])
+        if not buffer:
+            log.info("No dropped buffer found, skipping")
+            return False
+
+        for i, buf in enumerate(buffer):
+            file_o, bin_type_o, bin_section_o = self._get_dropped_objs(
+                self.get_relpath(buf['path']),
+                filename=f"buffer {i}",
+                comment="Dropped buffer"
+            )
+            self._add_yara(file_o, buf.get("yara", []))
+            self.event.add_object(file_o)
+
+    def parse(self):
+        """Run the parsing"""
+        for name, active in self.config.items():
+            if active:
+                self.options[name]["method"](self)
+
+    def get_misp_event(self):
+        log.debug("Running MISP expansions")
+        self.event.run_expansions()
+        return self.event
+

 def handler(q=False):
-    # Just in case we have no data
+    # In case there's no data
    if q is False:
        return False

-    # The return value
-    r = {'results': []}
-
-    # Load up that JSON
    q = json.loads(q)
-    data = base64.b64decode(q.get("data")).decode('utf-8')
+    data = q['data']

-    # If something really weird happened
-    if not data:
-        return json.dumps({"success": 0})
+    parser = CuckooParser(q['config'])
+    parser.read_archive(data)
+    parser.parse()
+    event = parser.get_misp_event()

-    data = json.loads(data)
-
-    # Get characteristics of file
-    targetFile = data['target']['file']
-
-    # Process the inital binary
-    processBinary(r, targetFile, initial=True)
-
-    # Get binary information for dropped files
-    if(data.get('dropped')):
-        for droppedFile in data['dropped']:
-            processBinary(r, droppedFile, dropped=True)
-
-    # Add malscore to results
-    r["results"].append({
-        "values": "Malscore: {} ".format(data['malscore']),
-        "types": "comment",
-        "categories": "Payload delivery",
-        "comment": "Cuckoo analysis: MalScore"
-    })
-
-    # Add virustotal data, if exists
-    if(data.get('virustotal')):
-        processVT(r, data['virustotal'])
-
-    # Add network information, should be improved
-    processNetwork(r, data['network'])
-
-    # Add behavioral information
-    processSummary(r, data['behavior']['summary'])
-
-    # Return
-    return r
-
-
-def processSummary(r, summary):
-    r["results"].append({
-        "values": summary['mutexes'],
-        "types": "mutex",
-        "categories": "Artifacts dropped",
-        "comment": "Cuckoo analysis: Observed mutexes"
-    })
-
-
-def processVT(r, virustotal):
-    category = "Antivirus detection"
-    comment = "VirusTotal analysis"
-
-    if(virustotal.get('permalink')):
-        r["results"].append({
-            "values": virustotal['permalink'],
-            "types": "link",
-            "categories": category,
-            "comments": comment + " - Permalink"
-        })
-
-    if(virustotal.get('total')):
-        r["results"].append({
-            "values": "VirusTotal detection rate {}/{}".format(
-                virustotal['positives'],
-                virustotal['total']
-            ),
-            "types": "comment",
-            "categories": category,
-            "comment": comment
-        })
-    else:
-        r["results"].append({
-            "values": "Sample not detected on VirusTotal",
-            "types": "comment",
-            "categories": category,
-            "comment": comment
-        })
-
-
-def processNetwork(r, network):
-    category = "Network activity"
-
-    for host in network['hosts']:
-        r["results"].append({
-            "values": host['ip'],
-            "types": "ip-dst",
-            "categories": category,
-            "comment": "Cuckoo analysis: Observed network traffic"
-        })
-
-
-def processBinary(r, target, initial=False, dropped=False):
-    if(initial):
-        comment = "Cuckoo analysis: Initial file"
-        category = "Payload delivery"
-    elif(dropped):
-        category = "Artifacts dropped"
-        comment = "Cuckoo analysis: Dropped file"
-
-    r["results"].append({
-        "values": target['name'],
-        "types": "filename",
-        "categories": category,
-        "comment": comment
-    })
-
-    r["results"].append({
-        "values": target['md5'],
-        "types": "md5",
-        "categories": category,
-        "comment": comment
-    })
-
-    r["results"].append({
-        "values": target['sha1'],
-        "types": "sha1",
-        "categories": category,
-        "comment": comment
-    })
-
-    r["results"].append({
-        "values": target['sha256'],
-        "types": "sha256",
-        "categories": category,
-        "comment": comment
-    })
-
-    r["results"].append({
-        "values": target['sha512'],
-        "types": "sha512",
-        "categories": category,
-        "comment": comment
-    })
-
-    # todo : add file size?
-
-    if(target.get('guest_paths')):
-        r["results"].append({
-            "values": target['guest_paths'],
-            "types": "filename",
-            "categories": "Payload installation",
-            "comment": comment + " - Path"
-        })
+    event = json.loads(event.to_json())
+    results = {
+        key: event[key]
+        for key in ('Attribute', 'Object')
+        if (key in event and event[key])
+    }
+    return {'results': results}


 def introspection():
-    modulesetup = {}
-    try:
-        userConfig
-        modulesetup['userConfig'] = userConfig
-    except NameError:
-        pass
-    try:
-        inputSource
-        modulesetup['inputSource'] = inputSource
-    except NameError:
-        pass
-    return modulesetup
+    userConfig = {
+        key: o["userConfig"]
+        for key, o in CuckooParser.options.items()
+    }
+    mispattributes['userConfig'] = userConfig
+    return mispattributes


 def version():
    moduleinfo['config'] = moduleconfig
    return moduleinfo
-
-
-if __name__ == '__main__':
-    x = open('test.json', 'r')
-    q = []
-    q['data'] = x.read()
-    q = base64.base64encode(q)
-
-    handler(q)
--- a/tests/test_expansions.py
+++ b/tests/test_expansions.py
@ -43,7 +43,7 @@ class TestExpansions(unittest.TestCase):
        query = {"module": "hibp", "email-src": "info@circl.lu"}
        response = self.misp_modules_post(query)
        to_check = self.get_values(response)
-        if to_check == "haveibeenpwned.com API not accessible (HTTP 403)":
+        if to_check == "haveibeenpwned.com API not accessible (HTTP 401)":
            self.skipTest(f"haveibeenpwned blocks travis IPs: {response}")
        self.assertEqual(to_check, 'OK (Not Found)', response)