Merge remote-tracking branch 'refs/remotes/CIRCL/master'

2015-12-08 16:18:59 +00:00 · 2015-12-08 16:18:59 +00:00 · 3d43c32808
parent ae1fed5ff2 ed23b84dcc
commit 3d43c32808
28 changed files with 1535 additions and 21 deletions
--- a/README_filecheck.md
+++ b/README_filecheck.md
@ -0,0 +1,86 @@
+Install Qemu and Expect
+============
+
+Install the necessary packages:
+
+```
+    sudo apt-get install qemu qemu-user-static expect
+```
+
+Create a new image from scratch
+===============================
+
+* Download the most recent Raspbian version:
+    http://downloads.raspberrypi.org/raspbian_latest
+
+* Unpack it:
+
+```
+    unzip 2015-05-05-raspbian-wheezy.zip
+    mv 2015-05-05-raspbian-wheezy.zip raspbian-wheezy.zip
+```
+
+Prepare the image
+=================
+
+It will be used for the build environment and the final image.
+
+* [Add empty space to the image](resize_img.md)
+
+* Chroot in the image
+
+```
+    sudo ./proper_chroot.sh
+```
+
+* Change your user to root (your global variables may be broken)
+
+```
+    su root
+```
+
+* The locales may be broken, fix it (remove `en_GB.UTF-8 UTF-8`, set `en_US.UTF-8 UTF-8`):
+
+```
+    dpkg-reconfigure locales
+```
+
+* In the image, make sure everything is up-to-date, and remove the old packages
+
+```
+    apt-get update
+    apt-get dist-upgrade
+    apt-get autoremove
+    apt-get install p7zip-full python-dev libxml2-dev libxslt1-dev pmount
+```
+
+* Install python requirements
+
+```
+    pip install lxml
+    pip install oletools olefile
+    pip install officedissector
+    pip install git+https://github.com/Rafiot/python-magic.git@travis
+    pip install git+https://github.com/CIRCL/PyCIRCLean.git
+```
+
+* Create the user and mtab for a RO filesystem
+
+```
+    useradd -m kitten
+    chown -R kitten:kitten /home/kitten
+    ln -s /proc/mounts /etc/mtab
+```
+
+* Copy the files
+
+```
+    sudo ./copy_to_final.sh /mnt/arm_rPi/
+```
+
+* Enable rc.local
+
+```
+    systemctl enable rc-local.service
+```
+
--- a/README_initial_setup.md
+++ b/README_initial_setup.md
@ -16,7 +16,8 @@ Create a new image from scratch
 * Unpack it:

 ```
-    unzip 2015-02-16-raspbian-wheezy.zip
+    unzip 2015-05-05-raspbian-wheezy.zip
+    mv 2015-05-05-raspbian-wheezy.zip raspbian-wheezy.zip
 ```

 Prepare the base image
@ -26,12 +27,6 @@ It will be used for the build environment and the final image.

 * [Add empty space to the image](resize_img.md)

-* Edit `mount_image.sh` and change the `IMAGE` variable accordingly
-
-```
-    IMAGE='2015-02-16-raspbian-wheezy.img'
-```
-
 * Chroot in the image

 ```
@ -64,8 +59,8 @@ Setup two images
 * Create two separate images: one will be used to build the deb packages that are not available in wheezy

 ```
-    mv 2015-02-16-raspbian-wheezy.img BUILDENV_2015-02-16-raspbian-wheezy.img
-    cp BUILDENV_2015-02-16-raspbian-wheezy.img FINAL_2015-02-16-raspbian-wheezy.img
+    mv raspbian-wheezy.img BUILDENV-raspbian-wheezy.img
+    cp BUILDENV-raspbian-wheezy.img FINAL-raspbian-wheezy.img
 ```

 Build environment specifics
@ -74,7 +69,7 @@ Build environment specifics
 * Create a symlink to the build image

 ```
-    ln -s  BUILDENV_2015-02-16-raspbian-wheezy.img 2015-02-16-raspbian-wheezy.img
+    ln -s  BUILDENV-raspbian-wheezy.img raspbian-wheezy.img
 ```

 * Chroot in the image
@ -147,8 +142,8 @@ Final image specifics
 * Change the link to the image

 ```
-   rm 2015-02-16-raspbian-wheezy.img
-   ln -s FINAL_2015-02-16-raspbian-wheezy.img 2015-02-16-raspbian-wheezy.img
+   rm raspbian-wheezy.img
+   ln -s FINAL-raspbian-wheezy.img -raspbian-wheezy.img
 ```

 * Chroot in the image
@ -210,7 +205,7 @@ Write the image on a SD card
 *WARNING*: Make sure you write on the right filesystem

 ```
-    sudo dd bs=4M if=FINAL_2015-02-16-raspbian-wheezy.img of=/dev/<FILESYSTEM>
+    sudo dd bs=4M if=FINAL-raspbian-wheezy.img of=/dev/<FILESYSTEM>
 ```

 Run the tests
--- a/copy_to_final.sh
+++ b/copy_to_final.sh
@ -19,7 +19,7 @@ fi
 #cp deb/*.deb ${CHROOT_PATH}/

 # prepare fs archive
-tar -cvpzf backup.tar.gz -C fs/ .
+tar -cvpzf backup.tar.gz -C fs_filecheck/ .
 tar -xzf backup.tar.gz -C ${CHROOT_PATH}/
 chown root:root ${CHROOT_PATH}/etc/sudoers
 if [ -f deb/led ]; then
--- a/fs/etc/udev/rules.d/50-blockhid.rules
+++ b/fs/etc/udev/rules.d/50-blockhid.rules
@ -0,0 +1 @@
+SUBSYSTEM=="hidraw", DRIVERS=="usbhid", RUN+="/bin/bash -c 'cd /sys/devices/platform/bcm2708_usb/usb1/1-1 ; for d in $(ls -d 1-1.[2-5]); do if [ $(ls -lR $d | grep -c usbhid) -gt 0 ] ; then echo 0 > $d/authorized ; fi ; done'"
--- a/fs_filecheck/etc/fstab
+++ b/fs_filecheck/etc/fstab
@ -0,0 +1,6 @@
+proc            /proc           proc    defaults          0       0
+/dev/mmcblk0p1  /boot           vfat    ro,defaults          0       0
+/dev/mmcblk0p2  /               ext4    ro,defaults,noatime  0       0
+tmpfs   /tmp    tmpfs  rw,size=64M,noexec,nodev,nosuid,mode=1777   0  0
+tmpfs   /media  tmpfs  rw,size=64M,noexec,nodev,nosuid,mode=1777   0  0
+# a swapfile is not a swap partition, so no using swapon|off from here on, use  dphys-swapfile swap[on|off]  for that
--- a/fs_filecheck/etc/group
+++ b/fs_filecheck/etc/group
@ -0,0 +1,54 @@
+root:x:0:
+daemon:x:1:
+bin:x:2:
+sys:x:3:
+adm:x:4:pi
+tty:x:5:
+disk:x:6:
+lp:x:7:
+mail:x:8:
+news:x:9:
+uucp:x:10:
+man:x:12:
+proxy:x:13:
+kmem:x:15:
+dialout:x:20:pi
+fax:x:21:
+voice:x:22:
+cdrom:x:24:pi
+floppy:x:25:
+tape:x:26:
+sudo:x:27:pi
+audio:x:29:pi
+dip:x:30:
+www-data:x:33:
+backup:x:34:
+operator:x:37:
+list:x:38:
+irc:x:39:
+src:x:40:
+gnats:x:41:
+shadow:x:42:
+utmp:x:43:
+video:x:44:pi
+sasl:x:45:
+plugdev:x:46:pi,kitten
+staff:x:50:
+games:x:60:pi
+users:x:100:pi
+nogroup:x:65534:
+libuuid:x:101:
+crontab:x:102:
+pi:x:1000:
+ssh:x:103:
+ntp:x:104:
+netdev:x:105:pi
+input:x:999:pi
+messagebus:x:106:
+lpadmin:x:107:
+fuse:x:108:
+lightdm:x:109:
+indiecity:x:1001:root
+spi:x:1002:pi
+gpio:x:1003:pi
+kitten:x:1004:
--- a/fs_filecheck/etc/pam.d/su
+++ b/fs_filecheck/etc/pam.d/su
@ -0,0 +1,61 @@
+#
+# The PAM configuration file for the Shadow `su' service
+#
+
+# This allows root to su without passwords (normal operation)
+auth       sufficient pam_rootok.so
+
+# Uncomment this to force users to be a member of group root
+# before they can use `su'. You can also add "group=foo"
+# to the end of this line if you want to use a group other
+# than the default "root" (but this may have side effect of
+# denying "root" user, unless she's a member of "foo" or explicitly
+# permitted earlier by e.g. "sufficient pam_rootok.so").
+# (Replaces the `SU_WHEEL_ONLY' option from login.defs)
+# auth       required   pam_wheel.so
+
+# Uncomment this if you want wheel members to be able to
+# su without a password.
+# auth       sufficient pam_wheel.so trust
+
+# Uncomment this if you want members of a specific group to not
+# be allowed to use su at all.
+# auth       required   pam_wheel.so deny group=nosu
+
+# Uncomment and edit /etc/security/time.conf if you need to set
+# time restrainst on su usage.
+# (Replaces the `PORTTIME_CHECKS_ENAB' option from login.defs
+# as well as /etc/porttime)
+# account    requisite  pam_time.so
+
+# This module parses environment configuration file(s)
+# and also allows you to use an extended config
+# file /etc/security/pam_env.conf.
+#
+# parsing /etc/environment needs "readenv=1"
+session       required   pam_env.so readenv=1
+# locale variables are also kept into /etc/default/locale in etch
+# reading this file *in addition to /etc/environment* does not hurt
+session       required   pam_env.so readenv=1 envfile=/etc/default/locale
+
+# Defines the MAIL environment variable
+# However, userdel also needs MAIL_DIR and MAIL_FILE variables
+# in /etc/login.defs to make sure that removing a user
+# also removes the user's mail spool file.
+# See comments in /etc/login.defs
+#
+# "nopen" stands to avoid reporting new mail when su'ing to another user
+session    optional   pam_mail.so nopen
+
+# Sets up user limits according to /etc/security/limits.conf
+# (Replaces the use of /etc/limits in old login)
+#session    required   pam_limits.so
+
+# The standard Unix authentication modules, used with
+# NIS (man nsswitch) as well as normal /etc/passwd and
+# /etc/shadow entries.
+@include common-auth
+@include common-account
+@include common-session
+
+
--- a/fs_filecheck/etc/passwd
+++ b/fs_filecheck/etc/passwd
@ -0,0 +1,30 @@
+root:x:0:0:root:/root:/bin/bash
+daemon:x:1:1:daemon:/usr/sbin:/usr/sbin/nologin
+bin:x:2:2:bin:/bin:/usr/sbin/nologin
+sys:x:3:3:sys:/dev:/usr/sbin/nologin
+sync:x:4:65534:sync:/bin:/bin/sync
+games:x:5:60:games:/usr/games:/usr/sbin/nologin
+man:x:6:12:man:/var/cache/man:/usr/sbin/nologin
+lp:x:7:7:lp:/var/spool/lpd:/usr/sbin/nologin
+mail:x:8:8:mail:/var/mail:/usr/sbin/nologin
+news:x:9:9:news:/var/spool/news:/usr/sbin/nologin
+uucp:x:10:10:uucp:/var/spool/uucp:/usr/sbin/nologin
+proxy:x:13:13:proxy:/bin:/usr/sbin/nologin
+www-data:x:33:33:www-data:/var/www:/usr/sbin/nologin
+backup:x:34:34:backup:/var/backups:/usr/sbin/nologin
+list:x:38:38:Mailing List Manager:/var/list:/usr/sbin/nologin
+irc:x:39:39:ircd:/var/run/ircd:/usr/sbin/nologin
+gnats:x:41:41:Gnats Bug-Reporting System (admin):/var/lib/gnats:/usr/sbin/nologin
+nobody:x:65534:65534:nobody:/nonexistent:/usr/sbin/nologin
+systemd-timesync:x:100:103:systemd Time Synchronization,,,:/run/systemd:/bin/false
+systemd-network:x:101:104:systemd Network Management,,,:/run/systemd/netif:/bin/false
+systemd-resolve:x:102:105:systemd Resolver,,,:/run/systemd/resolve:/bin/false
+systemd-bus-proxy:x:103:106:systemd Bus Proxy,,,:/run/systemd:/bin/false
+pi:x:1000:1000:,,,:/home/pi:/bin/false
+sshd:x:104:65534::/var/run/sshd:/usr/sbin/nologin
+messagebus:x:105:110::/var/run/dbus:/bin/false
+avahi:x:106:111:Avahi mDNS daemon,,,:/var/run/avahi-daemon:/bin/false
+ntp:x:107:112::/home/ntp:/bin/false
+statd:x:108:65534::/var/lib/nfs:/bin/false
+lightdm:x:109:114:Light Display Manager:/var/lib/lightdm:/bin/false
+kitten:x:1001:1004::/home/kitten:/bin/bash
--- a/fs_filecheck/etc/pmount.allow
+++ b/fs_filecheck/etc/pmount.allow
@ -0,0 +1,5 @@
+# /etc/pmount.allow
+# pmount will allow users to additionally mount all devices that are
+# listed here.
+/dev/sdb1
+/dev/sda*
--- a/fs_filecheck/etc/profile.d/raspi-config.sh
+++ b/fs_filecheck/etc/profile.d/raspi-config.sh
@ -0,0 +1,19 @@
+#!/bin/sh
+# Part of raspi-config http://github.com/asb/raspi-config
+#
+# See LICENSE file for copyright and license details
+
+# Should be installed to /etc/profile.d/raspi-config.sh to force raspi-config
+# to run at initial login
+
+# You may also want to set automatic login in /etc/inittab on tty1 by adding a
+# line such as:
+# 1:2345:respawn:/bin/login -f root tty1 </dev/tty1 >/dev/tty1 2>&1 # RPICFG_TO_DISABLE
+
+if [ $(id -u) -ne 0 ]; then
+    printf "\nNOTICE: the software on this Raspberry Pi has not been fully configured. Please run 'sudo raspi-config'\n\n"
+else
+    # Disable raspi-config at the first run.
+    # raspi-config
+    exec login -f pi
+fi
--- a/fs_filecheck/etc/rc.local
+++ b/fs_filecheck/etc/rc.local
@ -0,0 +1,36 @@
+#!/bin/sh -e
+#
+# rc.local
+#
+# This script is executed at the end of each multiuser runlevel.
+# Make sure that the script will "exit 0" on success or any other
+# value on error.
+#
+# In order to enable or disable this script just change the execution
+# bits.
+#
+# By default this script does nothing.
+
+clean(){
+    echo 'Rc Local done, quit.'
+    /sbin/shutdown -P -h now
+}
+
+# Print the IP address
+_IP=$(hostname -I) || true
+if [ "$_IP" ]; then
+  printf "My IP address is %s\n" "$_IP"
+fi
+
+if [ -e /dev/sda ]; then
+  if [ -e /dev/sdb ]; then
+    # avoid possible misuse
+    /sbin/ifconfig eth0 down
+    trap clean EXIT TERM INT
+    cd /opt/groomer
+    /usr/sbin/led &
+    ./init.sh
+  fi
+fi
+
+exit 0
--- a/fs_filecheck/etc/security/limits.conf
+++ b/fs_filecheck/etc/security/limits.conf
@ -0,0 +1 @@
+kitten hard priority -20
--- a/fs_filecheck/etc/sudoers
+++ b/fs_filecheck/etc/sudoers
@ -0,0 +1,28 @@
+#
+# This file MUST be edited with the 'visudo' command as root.
+#
+# Please consider adding local content in /etc/sudoers.d/ instead of
+# directly modifying this file.
+#
+# See the man page for details on how to write a sudoers file.
+#
+Defaults	env_reset
+Defaults	mail_badpass
+Defaults	secure_path="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
+
+# Host alias specification
+
+# User alias specification
+
+# Cmnd alias specification
+
+# User privilege specification
+#root	ALL=(ALL:ALL) ALL
+
+# Allow members of group sudo to execute any command
+#%sudo	ALL=(ALL:ALL) ALL
+
+# See sudoers(5) for more information on "#include" directives:
+
+#includedir /etc/sudoers.d
+#pi ALL=(ALL) NOPASSWD: ALL
--- a/fs_filecheck/etc/systemd/system/rc-local.service
+++ b/fs_filecheck/etc/systemd/system/rc-local.service
@ -0,0 +1,12 @@
+[Unit]
+Description=/etc/rc.local Compatibility
+
+[Service]
+Type=oneshot
+ExecStart=/etc/rc.local
+TimeoutSec=0
+StandardInput=tty
+RemainAfterExit=yes
+
+[Install]
+WantedBy=multi-user.target
--- a/fs_filecheck/etc/udev/rules.d/50-blockhid.rules
+++ b/fs_filecheck/etc/udev/rules.d/50-blockhid.rules
@ -0,0 +1 @@
+SUBSYSTEM=="hidraw", DRIVERS=="usbhid", RUN+="/bin/bash -c 'cd /sys/devices/platform/bcm2708_usb/usb1/1-1 ; for d in $(ls -d 1-1.[2-5]); do if [ $(ls -lR $d | grep -c usbhid) -gt 0 ] ; then echo 0 > $d/authorized ; fi ; done'"
--- a/fs_filecheck/etc/udev/rules.d/90-qemu.rules
+++ b/fs_filecheck/etc/udev/rules.d/90-qemu.rules
@ -0,0 +1,2 @@
+KERNEL=="sdc", SYMLINK+="mmcblk0"
+KERNEL=="sdc?", SYMLINK+="mmcblk0p%n",
--- a/fs_filecheck/opt/groomer/constraint.sh
+++ b/fs_filecheck/opt/groomer/constraint.sh
@ -0,0 +1,23 @@
+DEV_SRC='/dev/sda'
+DEV_DST='sdb1'
+
+# User allowed to do the following commands without password
+USERNAME='kitten'
+MUSIC="/opt/midi/"
+
+ID=`/usr/bin/id -u`
+
+# Paths used in multiple scripts
+SRC="src"
+DST="dst"
+TEMP="/media/${DST}/temp"
+ZIPTEMP="/media/${DST}/ziptemp"
+LOGS="/media/${DST}/logs"
+
+
+# commands
+SYNC='/bin/sync'
+TIMIDITY='/usr/bin/timidity'
+MOUNT='/bin/mount'
+PMOUNT='/usr/bin/pmount -A -s'
+PUMOUNT='/usr/bin/pumount'
--- a/fs_filecheck/opt/groomer/groomer.sh
+++ b/fs_filecheck/opt/groomer/groomer.sh
@ -0,0 +1,112 @@
+#!/bin/bash
+
+set -e
+set -x
+
+source ./constraint.sh
+if ! [ "${ID}" -ge "1000" ]; then
+    echo "This script cannot run as root."
+    exit
+fi
+
+clean(){
+    echo Cleaning.
+    ${SYNC}
+
+    # Cleanup source
+    pumount ${SRC}
+
+    # Cleanup destination
+    rm -rf ${TEMP}
+    rm -rf ${ZIPTEMP}
+    pumount ${DST}
+
+    exit
+}
+
+trap clean EXIT TERM INT
+
+# De we have a source device
+if [ ! -b ${DEV_SRC} ]; then
+    echo "Source device (${DEV_SRC}) does not exists."
+    exit
+fi
+# Find the partition names on the source device
+DEV_PARTITIONS=`ls "${DEV_SRC}"* | grep "${DEV_SRC}[1-9][0-6]*" || true`
+if [ -z "${DEV_PARTITIONS}" ]; then
+    echo "${DEV_SRC} does not have any partitions."
+    exit
+fi
+
+# Do we have a destination device
+if [ ! -b "/dev/${DEV_DST}" ]; then
+    echo "Destination device (/dev/${DEV_DST}) does not exists."
+    exit
+fi
+
+# mount and prepare destination device
+if ${MOUNT}|grep ${DST}; then
+    ${PUMOUNT} ${DST} || true
+fi
+# uid= only works on a vfat FS. What should wedo if we get an ext* FS ?
+${PMOUNT} -w ${DEV_DST} ${DST}
+if [ ${?} -ne 0 ]; then
+    echo "Unable to mount /dev/${DEV_DST} on /media/${DST}"
+    exit
+else
+    echo "Target USB device (/dev/${DEV_DST}) mounted at /media/${DST}"
+    rm -rf "/media/${DST}/FROM_PARTITION_"*
+
+    # prepare temp dirs and make sure it's empty
+    mkdir -p "${TEMP}"
+    mkdir -p "${ZIPTEMP}"
+    mkdir -p "${LOGS}"
+
+    rm -rf "${TEMP}/"*
+    rm -rf "${ZIPTEMP}/"*
+    rm -rf "${LOGS}/"*
+fi
+
+# Groom da kitteh!
+
+# Find the FS types
+# lsblk -n -o name,fstype,mountpoint,label,uuid -r
+
+PARTCOUNT=1
+for partition in ${DEV_PARTITIONS}
+do
+    # Processing a partition
+    echo "Processing partition: ${partition}"
+    if [ `${MOUNT} | grep -c ${SRC}` -ne 0 ]; then
+        ${PUMOUNT} ${SRC}
+    fi
+
+    ${PMOUNT} -w ${partition} ${SRC}
+    ls "/media/${SRC}" | grep -i autorun.inf | xargs -I {} mv "/media/${SRC}"/{} "/media/${SRC}"/DANGEROUS_{}_DANGEROUS || true
+    ${PUMOUNT} ${SRC}
+    ${PMOUNT} -r ${partition} ${SRC}
+    if [ ${?} -ne 0 ]; then
+        echo "Unable to mount ${partition} on /media/${SRC}"
+    else
+        echo "${partition} mounted at /media/${SRC}"
+
+        # Print the filenames on the current partition in a logfile
+        find "/media/${SRC}" -fls "${LOGS}/Content_partition_${PARTCOUNT}.txt"
+
+        # create a directory on ${DST} named PARTION_$PARTCOUNT
+        target_dir="/media/${DST}/FROM_PARTITION_${PARTCOUNT}"
+        echo "copying to: ${target_dir}"
+        mkdir -p "${target_dir}"
+        LOGFILE="${LOGS}/processing.txt"
+
+        echo "==== Starting processing of /media/${SRC} to ${target_dir}. ====" >> ${LOGFILE}
+        filecheck.py --source /media/${SRC} --destination ${target_dir} || true
+        echo "==== Done with /media/${SRC} to ${target_dir}. ====" >> ${LOGFILE}
+
+        ls -lR "${target_dir}"
+    fi
+    let PARTCOUNT=`expr $PARTCOUNT + 1`
+done
+
+# The cleanup is automatically done in the function clean called when
+# the program quits
--- a/fs_filecheck/opt/groomer/init.sh
+++ b/fs_filecheck/opt/groomer/init.sh
@ -0,0 +1,26 @@
+#!/bin/bash
+
+set -e
+set -x
+
+source ./constraint.sh
+
+if [ ${ID} -ne 0 ]; then
+    echo "This script has to be run as root."
+    exit
+fi
+
+clean(){
+    echo Done, cleaning.
+    ${SYNC}
+    kill -9 $(cat /tmp/music.pid)
+    rm -f /tmp/music.pid
+}
+
+trap clean EXIT TERM INT
+
+./music.sh &
+echo $! > /tmp/music.pid
+
+su ${USERNAME} -c ./groomer.sh
+
--- a/fs_filecheck/opt/groomer/music.sh
+++ b/fs_filecheck/opt/groomer/music.sh
@ -0,0 +1,21 @@
+#!/bin/bash
+
+set -e
+#set -x
+
+source ./constraint.sh
+
+killed(){
+    echo 'Music stopped.'
+}
+
+trap killed EXIT TERM INT
+
+# Force output on analog
+amixer cset numid=3 1
+
+files=(${MUSIC}*)
+
+while true; do
+    $TIMIDITY ${files[RANDOM % ${#files[@]}]}
+done
--- a/fs_filecheck/usr/local/bin/pdfid.py
+++ b/fs_filecheck/usr/local/bin/pdfid.py
@ -0,0 +1,930 @@
+#!/usr/bin/env python
+
+__description__ = 'Tool to test a PDF file'
+__author__ = 'Didier Stevens'
+__version__ = '0.2.1'
+__date__ = '2014/10/18'
+
+"""
+
+Tool to test a PDF file
+
+Source code put in public domain by Didier Stevens, no Copyright
+https://DidierStevens.com
+Use at your own risk
+
+History:
+  2009/03/27: start
+  2009/03/28: scan option
+  2009/03/29: V0.0.2: xml output
+  2009/03/31: V0.0.3: /ObjStm suggested by Dion
+  2009/04/02: V0.0.4: added ErrorMessage
+  2009/04/20: V0.0.5: added Dates
+  2009/04/21: V0.0.6: added entropy
+  2009/04/22: added disarm
+  2009/04/29: finished disarm
+  2009/05/13: V0.0.7: added cPDFEOF
+  2009/07/24: V0.0.8: added /AcroForm and /RichMedia, simplified %PDF header regex, extra date format (without TZ)
+  2009/07/25: added input redirection, option --force
+  2009/10/13: V0.0.9: added detection for CVE-2009-3459; added /RichMedia to disarm
+  2010/01/11: V0.0.10: relaxed %PDF header checking
+  2010/04/28: V0.0.11: added /Launch
+  2010/09/21: V0.0.12: fixed cntCharsAfterLastEOF bug; fix by Russell Holloway
+  2011/12/29: updated for Python 3, added keyword /EmbeddedFile
+  2012/03/03: added PDFiD2JSON; coded by Brandon Dixon
+  2013/02/10: V0.1.0: added http/https support; added support for ZIP file with password 'infected'
+  2013/03/11: V0.1.1: fixes for Python 3
+  2013/03/13: V0.1.2: Added error handling for files; added /XFA
+  2013/11/01: V0.2.0: Added @file & plugins
+  2013/11/02: continue
+  2013/11/04: added options -c, -m, -v
+  2013/11/06: added option -S
+  2013/11/08: continue
+  2013/11/09: added option -o
+  2013/11/15: refactoring
+  2014/09/30: added CSV header
+  2014/10/16: V0.2.1: added output when plugin & file not pdf
+  2014/10/18: some fixes for Python 3
+
+Todo:
+  - update XML example (entropy, EOF)
+  - code review, cleanup
+"""
+
+import optparse
+import os
+import re
+import xml.dom.minidom
+import traceback
+import math
+import operator
+import os.path
+import sys
+import json
+import zipfile
+import collections
+import glob
+try:
+    import urllib2
+    urllib23 = urllib2
+except:
+    import urllib.request
+    urllib23 = urllib.request
+
+#Convert 2 Bytes If Python 3
+def C2BIP3(string):
+    if sys.version_info[0] > 2:
+        return bytes([ord(x) for x in string])
+    else:
+        return string
+
+class cBinaryFile:
+    def __init__(self, file):
+        self.file = file
+        if file == '':
+            self.infile = sys.stdin
+        elif file.lower().startswith('http://') or file.lower().startswith('https://'):
+            try:
+                if sys.hexversion >= 0x020601F0:
+                    self.infile = urllib23.urlopen(file, timeout=5)
+                else:
+                    self.infile = urllib23.urlopen(file)
+            except urllib23.HTTPError:
+                print('Error accessing URL %s' % file)
+                print(sys.exc_info()[1])
+                sys.exit()
+        elif file.lower().endswith('.zip'):
+            try:
+                self.zipfile = zipfile.ZipFile(file, 'r')
+                self.infile = self.zipfile.open(self.zipfile.infolist()[0], 'r', C2BIP3('infected'))
+            except:
+                print('Error opening file %s' % file)
+                print(sys.exc_info()[1])
+                sys.exit()
+        else:
+            try:
+                self.infile = open(file, 'rb')
+            except:
+                print('Error opening file %s' % file)
+                print(sys.exc_info()[1])
+                sys.exit()
+        self.ungetted = []
+
+    def byte(self):
+        if len(self.ungetted) != 0:
+            return self.ungetted.pop()
+        inbyte = self.infile.read(1)
+        if not inbyte or inbyte == '':
+            self.infile.close()
+            return None
+        return ord(inbyte)
+
+    def bytes(self, size):
+        if size <= len(self.ungetted):
+            result = self.ungetted[0:size]
+            del self.ungetted[0:size]
+            return result
+        inbytes = self.infile.read(size - len(self.ungetted))
+        if inbytes == '':
+            self.infile.close()
+        if type(inbytes) == type(''):
+            result = self.ungetted + [ord(b) for b in inbytes]
+        else:
+            result = self.ungetted + [b for b in inbytes]
+        self.ungetted = []
+        return result
+
+    def unget(self, byte):
+        self.ungetted.append(byte)
+
+    def ungets(self, bytes):
+        bytes.reverse()
+        self.ungetted.extend(bytes)
+
+class cPDFDate:
+    def __init__(self):
+        self.state = 0
+
+    def parse(self, char):
+        if char == 'D':
+            self.state = 1
+            return None
+        elif self.state == 1:
+            if char == ':':
+                self.state = 2
+                self.digits1 = ''
+            else:
+                self.state = 0
+            return None
+        elif self.state == 2:
+            if len(self.digits1) < 14:
+                if char >= '0' and char <= '9':
+                    self.digits1 += char
+                    return None
+                else:
+                    self.state = 0
+                    return None
+            elif char == '+' or char == '-' or char == 'Z':
+                self.state = 3
+                self.digits2 = ''
+                self.TZ = char
+                return None
+            elif char == '"':
+                self.state = 0
+                self.date = 'D:' + self.digits1
+                return self.date
+            elif char < '0' or char > '9':
+                self.state = 0
+                self.date = 'D:' + self.digits1
+                return self.date
+            else:
+                self.state = 0
+                return None
+        elif self.state == 3:
+            if len(self.digits2) < 2:
+                if char >= '0' and char <= '9':
+                    self.digits2 += char
+                    return None
+                else:
+                    self.state = 0
+                    return None
+            elif len(self.digits2) == 2:
+                if char == "'":
+                    self.digits2 += char
+                    return None
+                else:
+                    self.state = 0
+                    return None
+            elif len(self.digits2) < 5:
+                if char >= '0' and char <= '9':
+                    self.digits2 += char
+                    if len(self.digits2) == 5:
+                        self.state = 0
+                        self.date = 'D:' + self.digits1 + self.TZ + self.digits2
+                        return self.date
+                    else:
+                        return None
+                else:
+                    self.state = 0
+                    return None
+
+def fEntropy(countByte, countTotal):
+    x = float(countByte) / countTotal
+    if x > 0:
+        return - x * math.log(x, 2)
+    else:
+        return 0.0
+
+class cEntropy:
+    def __init__(self):
+        self.allBucket = [0 for i in range(0, 256)]
+        self.streamBucket = [0 for i in range(0, 256)]
+
+    def add(self, byte, insideStream):
+        self.allBucket[byte] += 1
+        if insideStream:
+            self.streamBucket[byte] += 1
+
+    def removeInsideStream(self, byte):
+        if self.streamBucket[byte] > 0:
+            self.streamBucket[byte] -= 1
+
+    def calc(self):
+        self.nonStreamBucket = map(operator.sub, self.allBucket, self.streamBucket)
+        allCount = sum(self.allBucket)
+        streamCount = sum(self.streamBucket)
+        nonStreamCount = sum(self.nonStreamBucket)
+        return (allCount, sum(map(lambda x: fEntropy(x, allCount), self.allBucket)), streamCount, sum(map(lambda x: fEntropy(x, streamCount), self.streamBucket)), nonStreamCount, sum(map(lambda x: fEntropy(x, nonStreamCount), self.nonStreamBucket)))
+
+class cPDFEOF:
+    def __init__(self):
+        self.token = ''
+        self.cntEOFs = 0
+
+    def parse(self, char):
+        if self.cntEOFs > 0:
+            self.cntCharsAfterLastEOF += 1
+        if self.token == '' and char == '%':
+            self.token += char
+            return
+        elif self.token == '%' and char == '%':
+            self.token += char
+            return
+        elif self.token == '%%' and char == 'E':
+            self.token += char
+            return
+        elif self.token == '%%E' and char == 'O':
+            self.token += char
+            return
+        elif self.token == '%%EO' and char == 'F':
+            self.token += char
+            return
+        elif self.token == '%%EOF' and (char == '\n' or char == '\r' or char == ' ' or char == '\t'):
+            self.cntEOFs += 1
+            self.cntCharsAfterLastEOF = 0
+            if char == '\n':
+                self.token = ''
+            else:
+                self.token += char
+            return
+        elif self.token == '%%EOF\r':
+            if char == '\n':
+                self.cntCharsAfterLastEOF = 0
+            self.token = ''
+        else:
+            self.token = ''
+
+def FindPDFHeaderRelaxed(oBinaryFile):
+    bytes = oBinaryFile.bytes(1024)
+    index = ''.join([chr(byte) for byte in bytes]).find('%PDF')
+    if index == -1:
+        oBinaryFile.ungets(bytes)
+        return ([], None)
+    for endHeader in range(index + 4, index + 4 + 10):
+        if bytes[endHeader] == 10 or bytes[endHeader] == 13:
+            break
+    oBinaryFile.ungets(bytes[endHeader:])
+    return (bytes[0:endHeader], ''.join([chr(byte) for byte in bytes[index:endHeader]]))
+
+def Hexcode2String(char):
+    if type(char) == int:
+        return '#%02x' % char
+    else:
+        return char
+
+def SwapCase(char):
+    if type(char) == int:
+        return ord(chr(char).swapcase())
+    else:
+        return char.swapcase()
+
+def HexcodeName2String(hexcodeName):
+    return ''.join(map(Hexcode2String, hexcodeName))
+
+def SwapName(wordExact):
+    return map(SwapCase, wordExact)
+
+def UpdateWords(word, wordExact, slash, words, hexcode, allNames, lastName, insideStream, oEntropy, fOut):
+    if word != '':
+        if slash + word in words:
+            words[slash + word][0] += 1
+            if hexcode:
+                words[slash + word][1] += 1
+        elif slash == '/' and allNames:
+            words[slash + word] = [1, 0]
+            if hexcode:
+                words[slash + word][1] += 1
+        if slash == '/':
+            lastName = slash + word
+        if slash == '':
+            if word == 'stream':
+                insideStream = True
+            if word == 'endstream':
+                if insideStream == True and oEntropy != None:
+                    for char in 'endstream':
+                        oEntropy.removeInsideStream(ord(char))
+                insideStream = False
+        if fOut != None:
+            if slash == '/' and '/' + word in ('/JS', '/JavaScript', '/AA', '/OpenAction', '/JBIG2Decode', '/RichMedia', '/Launch'):
+                wordExactSwapped = HexcodeName2String(SwapName(wordExact))
+                fOut.write(C2BIP3(wordExactSwapped))
+                print('/%s -> /%s' % (HexcodeName2String(wordExact), wordExactSwapped))
+            else:
+                fOut.write(C2BIP3(HexcodeName2String(wordExact)))
+    return ('', [], False, lastName, insideStream)
+
+class cCVE_2009_3459:
+    def __init__(self):
+        self.count = 0
+
+    def Check(self, lastName, word):
+        if (lastName == '/Colors' and word.isdigit() and int(word) > 2^24): # decided to alert when the number of colors is expressed with more than 3 bytes
+            self.count += 1
+
+def XMLAddAttribute(xmlDoc, name, value=None):
+    att = xmlDoc.createAttribute(name)
+    xmlDoc.documentElement.setAttributeNode(att)
+    if value != None:
+        att.nodeValue = value
+
+def PDFiD(file, allNames=False, extraData=False, disarm=False, force=False):
+    """Example of XML output:
+    <PDFiD ErrorOccured="False" ErrorMessage="" Filename="test.pdf" Header="%PDF-1.1" IsPDF="True" Version="0.0.4" Entropy="4.28">
+            <Keywords>
+                    <Keyword Count="7" HexcodeCount="0" Name="obj"/>
+                    <Keyword Count="7" HexcodeCount="0" Name="endobj"/>
+                    <Keyword Count="1" HexcodeCount="0" Name="stream"/>
+                    <Keyword Count="1" HexcodeCount="0" Name="endstream"/>
+                    <Keyword Count="1" HexcodeCount="0" Name="xref"/>
+                    <Keyword Count="1" HexcodeCount="0" Name="trailer"/>
+                    <Keyword Count="1" HexcodeCount="0" Name="startxref"/>
+                    <Keyword Count="1" HexcodeCount="0" Name="/Page"/>
+                    <Keyword Count="0" HexcodeCount="0" Name="/Encrypt"/>
+                    <Keyword Count="1" HexcodeCount="0" Name="/JS"/>
+                    <Keyword Count="1" HexcodeCount="0" Name="/JavaScript"/>
+                    <Keyword Count="0" HexcodeCount="0" Name="/AA"/>
+                    <Keyword Count="1" HexcodeCount="0" Name="/OpenAction"/>
+                    <Keyword Count="0" HexcodeCount="0" Name="/JBIG2Decode"/>
+            </Keywords>
+            <Dates>
+                    <Date Value="D:20090128132916+01'00" Name="/ModDate"/>
+            </Dates>
+    </PDFiD>
+    """
+
+    word = ''
+    wordExact = []
+    hexcode = False
+    lastName = ''
+    insideStream = False
+    keywords = ('obj',
+                'endobj',
+                'stream',
+                'endstream',
+                'xref',
+                'trailer',
+                'startxref',
+                '/Page',
+                '/Encrypt',
+                '/ObjStm',
+                '/JS',
+                '/JavaScript',
+                '/AA',
+                '/OpenAction',
+                '/AcroForm',
+                '/JBIG2Decode',
+                '/RichMedia',
+                '/Launch',
+                '/EmbeddedFile',
+                '/XFA',
+               )
+    words = {}
+    dates = []
+    for keyword in keywords:
+        words[keyword] = [0, 0]
+    slash = ''
+    xmlDoc = xml.dom.minidom.getDOMImplementation().createDocument(None, 'PDFiD', None)
+    XMLAddAttribute(xmlDoc, 'Version', __version__)
+    XMLAddAttribute(xmlDoc, 'Filename', file)
+    attErrorOccured = XMLAddAttribute(xmlDoc, 'ErrorOccured', 'False')
+    attErrorMessage = XMLAddAttribute(xmlDoc, 'ErrorMessage', '')
+
+    oPDFDate = None
+    oEntropy = None
+    oPDFEOF = None
+    oCVE_2009_3459 = cCVE_2009_3459()
+    try:
+        attIsPDF = xmlDoc.createAttribute('IsPDF')
+        xmlDoc.documentElement.setAttributeNode(attIsPDF)
+        oBinaryFile = cBinaryFile(file)
+        if extraData:
+            oPDFDate = cPDFDate()
+            oEntropy = cEntropy()
+            oPDFEOF = cPDFEOF()
+        (bytesHeader, pdfHeader) = FindPDFHeaderRelaxed(oBinaryFile)
+        if disarm:
+            (pathfile, extension) = os.path.splitext(file)
+            fOut = open(pathfile + '.disarmed' + extension, 'wb')
+            for byteHeader in bytesHeader:
+                fOut.write(C2BIP3(chr(byteHeader)))
+        else:
+            fOut = None
+        if oEntropy != None:
+            for byteHeader in bytesHeader:
+                oEntropy.add(byteHeader, insideStream)
+        if pdfHeader == None and not force:
+            attIsPDF.nodeValue = 'False'
+            return xmlDoc
+        else:
+            if pdfHeader == None:
+                attIsPDF.nodeValue = 'False'
+                pdfHeader = ''
+            else:
+                attIsPDF.nodeValue = 'True'
+            att = xmlDoc.createAttribute('Header')
+            att.nodeValue = repr(pdfHeader[0:10]).strip("'")
+            xmlDoc.documentElement.setAttributeNode(att)
+        byte = oBinaryFile.byte()
+        while byte != None:
+            char = chr(byte)
+            charUpper = char.upper()
+            if charUpper >= 'A' and charUpper <= 'Z' or charUpper >= '0' and charUpper <= '9':
+                word += char
+                wordExact.append(char)
+            elif slash == '/' and char == '#':
+                d1 = oBinaryFile.byte()
+                if d1 != None:
+                    d2 = oBinaryFile.byte()
+                    if d2 != None and (chr(d1) >= '0' and chr(d1) <= '9' or chr(d1).upper() >= 'A' and chr(d1).upper() <= 'F') and (chr(d2) >= '0' and chr(d2) <= '9' or chr(d2).upper() >= 'A' and chr(d2).upper() <= 'F'):
+                        word += chr(int(chr(d1) + chr(d2), 16))
+                        wordExact.append(int(chr(d1) + chr(d2), 16))
+                        hexcode = True
+                        if oEntropy != None:
+                            oEntropy.add(d1, insideStream)
+                            oEntropy.add(d2, insideStream)
+                        if oPDFEOF != None:
+                            oPDFEOF.parse(d1)
+                            oPDFEOF.parse(d2)
+                    else:
+                        oBinaryFile.unget(d2)
+                        oBinaryFile.unget(d1)
+                        (word, wordExact, hexcode, lastName, insideStream) = UpdateWords(word, wordExact, slash, words, hexcode, allNames, lastName, insideStream, oEntropy, fOut)
+                        if disarm:
+                            fOut.write(C2BIP3(char))
+                else:
+                    oBinaryFile.unget(d1)
+                    (word, wordExact, hexcode, lastName, insideStream) = UpdateWords(word, wordExact, slash, words, hexcode, allNames, lastName, insideStream, oEntropy, fOut)
+                    if disarm:
+                        fOut.write(C2BIP3(char))
+            else:
+                oCVE_2009_3459.Check(lastName, word)
+
+                (word, wordExact, hexcode, lastName, insideStream) = UpdateWords(word, wordExact, slash, words, hexcode, allNames, lastName, insideStream, oEntropy, fOut)
+                if char == '/':
+                    slash = '/'
+                else:
+                    slash = ''
+                if disarm:
+                    fOut.write(C2BIP3(char))
+
+            if oPDFDate != None and oPDFDate.parse(char) != None:
+                dates.append([oPDFDate.date, lastName])
+
+            if oEntropy != None:
+                oEntropy.add(byte, insideStream)
+
+            if oPDFEOF != None:
+                oPDFEOF.parse(char)
+
+            byte = oBinaryFile.byte()
+        (word, wordExact, hexcode, lastName, insideStream) = UpdateWords(word, wordExact, slash, words, hexcode, allNames, lastName, insideStream, oEntropy, fOut)
+
+        # check to see if file ended with %%EOF.  If so, we can reset charsAfterLastEOF and add one to EOF count.  This is never performed in
+        # the parse function because it never gets called due to hitting the end of file.
+        if byte == None and oPDFEOF != None:
+            if oPDFEOF.token == '%%EOF':
+                oPDFEOF.cntEOFs += 1
+                oPDFEOF.cntCharsAfterLastEOF = 0
+                oPDFEOF.token = ''
+
+    except SystemExit:
+        sys.exit()
+    except:
+        attErrorOccured.nodeValue = 'True'
+        attErrorMessage.nodeValue = traceback.format_exc()
+
+    if disarm:
+        fOut.close()
+
+    attEntropyAll = xmlDoc.createAttribute('TotalEntropy')
+    xmlDoc.documentElement.setAttributeNode(attEntropyAll)
+    attCountAll = xmlDoc.createAttribute('TotalCount')
+    xmlDoc.documentElement.setAttributeNode(attCountAll)
+    attEntropyStream = xmlDoc.createAttribute('StreamEntropy')
+    xmlDoc.documentElement.setAttributeNode(attEntropyStream)
+    attCountStream = xmlDoc.createAttribute('StreamCount')
+    xmlDoc.documentElement.setAttributeNode(attCountStream)
+    attEntropyNonStream = xmlDoc.createAttribute('NonStreamEntropy')
+    xmlDoc.documentElement.setAttributeNode(attEntropyNonStream)
+    attCountNonStream = xmlDoc.createAttribute('NonStreamCount')
+    xmlDoc.documentElement.setAttributeNode(attCountNonStream)
+    if oEntropy != None:
+        (countAll, entropyAll , countStream, entropyStream, countNonStream, entropyNonStream) = oEntropy.calc()
+        attEntropyAll.nodeValue = '%f' % entropyAll
+        attCountAll.nodeValue = '%d' % countAll
+        attEntropyStream.nodeValue = '%f' % entropyStream
+        attCountStream.nodeValue = '%d' % countStream
+        attEntropyNonStream.nodeValue = '%f' % entropyNonStream
+        attCountNonStream.nodeValue = '%d' % countNonStream
+    else:
+        attEntropyAll.nodeValue = ''
+        attCountAll.nodeValue = ''
+        attEntropyStream.nodeValue = ''
+        attCountStream.nodeValue = ''
+        attEntropyNonStream.nodeValue = ''
+        attCountNonStream.nodeValue = ''
+    attCountEOF = xmlDoc.createAttribute('CountEOF')
+    xmlDoc.documentElement.setAttributeNode(attCountEOF)
+    attCountCharsAfterLastEOF = xmlDoc.createAttribute('CountCharsAfterLastEOF')
+    xmlDoc.documentElement.setAttributeNode(attCountCharsAfterLastEOF)
+    if oPDFEOF != None:
+        attCountEOF.nodeValue = '%d' % oPDFEOF.cntEOFs
+        attCountCharsAfterLastEOF.nodeValue = '%d' % oPDFEOF.cntCharsAfterLastEOF
+    else:
+        attCountEOF.nodeValue = ''
+        attCountCharsAfterLastEOF.nodeValue = ''
+
+    eleKeywords = xmlDoc.createElement('Keywords')
+    xmlDoc.documentElement.appendChild(eleKeywords)
+    for keyword in keywords:
+        eleKeyword = xmlDoc.createElement('Keyword')
+        eleKeywords.appendChild(eleKeyword)
+        att = xmlDoc.createAttribute('Name')
+        att.nodeValue = keyword
+        eleKeyword.setAttributeNode(att)
+        att = xmlDoc.createAttribute('Count')
+        att.nodeValue = str(words[keyword][0])
+        eleKeyword.setAttributeNode(att)
+        att = xmlDoc.createAttribute('HexcodeCount')
+        att.nodeValue = str(words[keyword][1])
+        eleKeyword.setAttributeNode(att)
+    eleKeyword = xmlDoc.createElement('Keyword')
+    eleKeywords.appendChild(eleKeyword)
+    att = xmlDoc.createAttribute('Name')
+    att.nodeValue = '/Colors > 2^24'
+    eleKeyword.setAttributeNode(att)
+    att = xmlDoc.createAttribute('Count')
+    att.nodeValue = str(oCVE_2009_3459.count)
+    eleKeyword.setAttributeNode(att)
+    att = xmlDoc.createAttribute('HexcodeCount')
+    att.nodeValue = str(0)
+    eleKeyword.setAttributeNode(att)
+    if allNames:
+        keys = sorted(words.keys())
+        for word in keys:
+            if not word in keywords:
+                eleKeyword = xmlDoc.createElement('Keyword')
+                eleKeywords.appendChild(eleKeyword)
+                att = xmlDoc.createAttribute('Name')
+                att.nodeValue = word
+                eleKeyword.setAttributeNode(att)
+                att = xmlDoc.createAttribute('Count')
+                att.nodeValue = str(words[word][0])
+                eleKeyword.setAttributeNode(att)
+                att = xmlDoc.createAttribute('HexcodeCount')
+                att.nodeValue = str(words[word][1])
+                eleKeyword.setAttributeNode(att)
+    eleDates = xmlDoc.createElement('Dates')
+    xmlDoc.documentElement.appendChild(eleDates)
+    dates.sort(key=lambda x: x[0])
+    for date in dates:
+        eleDate = xmlDoc.createElement('Date')
+        eleDates.appendChild(eleDate)
+        att = xmlDoc.createAttribute('Value')
+        att.nodeValue = date[0]
+        eleDate.setAttributeNode(att)
+        att = xmlDoc.createAttribute('Name')
+        att.nodeValue = date[1]
+        eleDate.setAttributeNode(att)
+    return xmlDoc
+
+def PDFiD2String(xmlDoc, force):
+    result = 'PDFiD %s %s\n' % (xmlDoc.documentElement.getAttribute('Version'), xmlDoc.documentElement.getAttribute('Filename'))
+    if xmlDoc.documentElement.getAttribute('ErrorOccured') == 'True':
+        return result + '***Error occured***\n%s\n' % xmlDoc.documentElement.getAttribute('ErrorMessage')
+    if not force and xmlDoc.documentElement.getAttribute('IsPDF') == 'False':
+        return result + ' Not a PDF document\n'
+    result += ' PDF Header: %s\n' % xmlDoc.documentElement.getAttribute('Header')
+    for node in xmlDoc.documentElement.getElementsByTagName('Keywords')[0].childNodes:
+        result += ' %-16s %7d' % (node.getAttribute('Name'), int(node.getAttribute('Count')))
+        if int(node.getAttribute('HexcodeCount')) > 0:
+            result += '(%d)' % int(node.getAttribute('HexcodeCount'))
+        result += '\n'
+    if xmlDoc.documentElement.getAttribute('CountEOF') != '':
+        result += ' %-16s %7d\n' % ('%%EOF', int(xmlDoc.documentElement.getAttribute('CountEOF')))
+    if xmlDoc.documentElement.getAttribute('CountCharsAfterLastEOF') != '':
+        result += ' %-16s %7d\n' % ('After last %%EOF', int(xmlDoc.documentElement.getAttribute('CountCharsAfterLastEOF')))
+    for node in xmlDoc.documentElement.getElementsByTagName('Dates')[0].childNodes:
+        result += ' %-23s %s\n' % (node.getAttribute('Value'), node.getAttribute('Name'))
+    if xmlDoc.documentElement.getAttribute('TotalEntropy') != '':
+        result += ' Total entropy:           %s (%10s bytes)\n' % (xmlDoc.documentElement.getAttribute('TotalEntropy'), xmlDoc.documentElement.getAttribute('TotalCount'))
+    if xmlDoc.documentElement.getAttribute('StreamEntropy') != '':
+        result += ' Entropy inside streams:  %s (%10s bytes)\n' % (xmlDoc.documentElement.getAttribute('StreamEntropy'), xmlDoc.documentElement.getAttribute('StreamCount'))
+    if xmlDoc.documentElement.getAttribute('NonStreamEntropy') != '':
+        result += ' Entropy outside streams: %s (%10s bytes)\n' % (xmlDoc.documentElement.getAttribute('NonStreamEntropy'), xmlDoc.documentElement.getAttribute('NonStreamCount'))
+    return result
+
+class cCount():
+    def __init__(self, count, hexcode):
+        self.count = count
+        self.hexcode = hexcode
+
+class cPDFiD():
+    def __init__(self, xmlDoc, force):
+        self.version = xmlDoc.documentElement.getAttribute('Version')
+        self.filename = xmlDoc.documentElement.getAttribute('Filename')
+        self.errorOccured = xmlDoc.documentElement.getAttribute('ErrorOccured') == 'True'
+        self.errorMessage = xmlDoc.documentElement.getAttribute('ErrorMessage')
+        self.isPDF = None
+        if self.errorOccured:
+            return
+        self.isPDF = xmlDoc.documentElement.getAttribute('IsPDF') == 'True'
+        if not force and not self.isPDF:
+            return
+        self.header = xmlDoc.documentElement.getAttribute('Header')
+        self.keywords = {}
+        for node in xmlDoc.documentElement.getElementsByTagName('Keywords')[0].childNodes:
+            self.keywords[node.getAttribute('Name')] = cCount(int(node.getAttribute('Count')), int(node.getAttribute('HexcodeCount')))
+        self.obj = self.keywords['obj']
+        self.endobj = self.keywords['endobj']
+        self.stream = self.keywords['stream']
+        self.endstream = self.keywords['endstream']
+        self.xref = self.keywords['xref']
+        self.trailer = self.keywords['trailer']
+        self.startxref = self.keywords['startxref']
+        self.page = self.keywords['/Page']
+        self.encrypt = self.keywords['/Encrypt']
+        self.objstm = self.keywords['/ObjStm']
+        self.js = self.keywords['/JS']
+        self.javascript = self.keywords['/JavaScript']
+        self.aa = self.keywords['/AA']
+        self.openaction = self.keywords['/OpenAction']
+        self.acroform = self.keywords['/AcroForm']
+        self.jbig2decode = self.keywords['/JBIG2Decode']
+        self.richmedia = self.keywords['/RichMedia']
+        self.launch = self.keywords['/Launch']
+        self.embeddedfile = self.keywords['/EmbeddedFile']
+        self.xfa = self.keywords['/XFA']
+        self.colors_gt_2_24 = self.keywords['/Colors > 2^24']
+
+def Print(lines, options):
+    print(lines)
+    filename = None
+    if options.scan:
+        filename = 'PDFiD.log'
+    if options.output != '':
+        filename = options.output
+    if filename:
+        logfile = open(filename, 'a')
+        logfile.write(lines + '\n')
+        logfile.close()
+
+def Quote(value, separator, quote):
+    if isinstance(value, str):
+        if separator in value:
+            return quote + value + quote
+    return value
+
+def MakeCSVLine(fields, separator=';', quote='"'):
+    formatstring = separator.join([field[0] for field in fields])
+    strings = [Quote(field[1], separator, quote) for field in fields]
+    return formatstring % tuple(strings)
+
+def ProcessFile(filename, options, plugins):
+    xmlDoc = PDFiD(filename, options.all, options.extra, options.disarm, options.force)
+    if plugins == [] and options.select == '':
+        Print(PDFiD2String(xmlDoc, options.force), options)
+        return
+
+    oPDFiD = cPDFiD(xmlDoc, options.force)
+    if options.select:
+        if options.force or not oPDFiD.errorOccured and oPDFiD.isPDF:
+            pdf = oPDFiD
+            try:
+                selected = eval(options.select)
+            except Exception as e:
+                Print('Error evaluating select expression: %s' % options.select, options)
+                if options.verbose:
+                    raise e
+                return
+            if selected:
+                if options.csv:
+                    Print(filename, options)
+                else:
+                    Print(PDFiD2String(xmlDoc, options.force), options)
+    else:
+        for cPlugin in plugins:
+            if not cPlugin.onlyValidPDF or not oPDFiD.errorOccured and oPDFiD.isPDF:
+                try:
+                    oPlugin = cPlugin(oPDFiD)
+                except Exception as e:
+                    Print('Error instantiating plugin: %s' % cPlugin.name, options)
+                    if options.verbose:
+                        raise e
+                    return
+
+                try:
+                    score = oPlugin.Score()
+                except Exception as e:
+                    Print('Error running plugin: %s' % cPlugin.name, options)
+                    if options.verbose:
+                        raise e
+                    return
+
+                if options.csv:
+                    if score >= options.minimumscore:
+                        Print(MakeCSVLine((('%s', filename), ('%s', cPlugin.name), ('%.02f', score))), options)
+                else:
+                    if score >= options.minimumscore:
+                        Print(PDFiD2String(xmlDoc, options.force), options)
+                        Print('%s score: %.02f' % (cPlugin.name, score), options)
+            else:
+                if options.csv:
+                    if oPDFiD.errorOccured:
+                        Print(MakeCSVLine((('%s', filename), ('%s', cPlugin.name), ('%s', 'Error occured'))), options)
+                    if not oPDFiD.isPDF:
+                        Print(MakeCSVLine((('%s', filename), ('%s', cPlugin.name), ('%s', 'Not a PDF document'))), options)
+                else:
+                    Print(PDFiD2String(xmlDoc, options.force), options)
+
+
+def Scan(directory, options, plugins):
+    try:
+        if os.path.isdir(directory):
+            for entry in os.listdir(directory):
+                Scan(os.path.join(directory, entry), options, plugins)
+        else:
+            ProcessFile(directory, options, plugins)
+    except Exception as e:
+#        print directory
+        print(e)
+#        print(sys.exc_info()[2])
+#        print traceback.format_exc()
+
+#function derived from: http://blog.9bplus.com/pdfidpy-output-to-json
+def PDFiD2JSON(xmlDoc, force):
+    #Get Top Layer Data
+    errorOccured = xmlDoc.documentElement.getAttribute('ErrorOccured')
+    errorMessage = xmlDoc.documentElement.getAttribute('ErrorMessage')
+    filename = xmlDoc.documentElement.getAttribute('Filename')
+    header = xmlDoc.documentElement.getAttribute('Header')
+    isPdf = xmlDoc.documentElement.getAttribute('IsPDF')
+    version = xmlDoc.documentElement.getAttribute('Version')
+    entropy = xmlDoc.documentElement.getAttribute('Entropy')
+
+    #extra data
+    countEof = xmlDoc.documentElement.getAttribute('CountEOF')
+    countChatAfterLastEof = xmlDoc.documentElement.getAttribute('CountCharsAfterLastEOF')
+    totalEntropy = xmlDoc.documentElement.getAttribute('TotalEntropy')
+    streamEntropy = xmlDoc.documentElement.getAttribute('StreamEntropy')
+    nonStreamEntropy = xmlDoc.documentElement.getAttribute('NonStreamEntropy')
+
+    keywords = []
+    dates = []
+
+    #grab all keywords
+    for node in xmlDoc.documentElement.getElementsByTagName('Keywords')[0].childNodes:
+        name = node.getAttribute('Name')
+        count = int(node.getAttribute('Count'))
+        if int(node.getAttribute('HexcodeCount')) > 0:
+            hexCount = int(node.getAttribute('HexcodeCount'))
+        else:
+            hexCount = 0
+        keyword = { 'count':count, 'hexcodecount':hexCount, 'name':name }
+        keywords.append(keyword)
+
+    #grab all date information
+    for node in xmlDoc.documentElement.getElementsByTagName('Dates')[0].childNodes:
+        name = node.getAttribute('Name')
+        value = node.getAttribute('Value')
+        date = { 'name':name, 'value':value }
+        dates.append(date)
+
+    data = { 'countEof':countEof, 'countChatAfterLastEof':countChatAfterLastEof, 'totalEntropy':totalEntropy, 'streamEntropy':streamEntropy, 'nonStreamEntropy':nonStreamEntropy, 'errorOccured':errorOccured, 'errorMessage':errorMessage, 'filename':filename, 'header':header, 'isPdf':isPdf, 'version':version, 'entropy':entropy, 'keywords': { 'keyword': keywords }, 'dates': { 'date':dates} }
+    complete = [ { 'pdfid' : data} ]
+    result = json.dumps(complete)
+    return result
+
+def File2Strings(filename):
+    try:
+        f = open(filename, 'r')
+    except:
+        return None
+    try:
+        return list(map(lambda line:line.rstrip('\n'), f.readlines()))
+    except:
+        return None
+    finally:
+        f.close()
+
+def ProcessAt(argument):
+    if argument.startswith('@'):
+        strings = File2Strings(argument[1:])
+        if strings == None:
+            raise Exception('Error reading %s' % argument)
+        else:
+            return strings
+    else:
+        return [argument]
+
+def AddPlugin(cClass):
+    global plugins
+
+    plugins.append(cClass)
+
+def ExpandFilenameArguments(filenames):
+    return list(collections.OrderedDict.fromkeys(sum(map(glob.glob, sum(map(ProcessAt, filenames), [])), [])))
+
+class cPluginParent():
+    onlyValidPDF = True
+
+def LoadPlugins(plugins, verbose):
+    if plugins == '':
+        return
+    scriptPath = os.path.dirname(sys.argv[0])
+    for plugin in sum(map(ProcessAt, plugins.split(',')), []):
+        try:
+            if not plugin.lower().endswith('.py'):
+                plugin += '.py'
+            if os.path.dirname(plugin) == '':
+                if not os.path.exists(plugin):
+                    scriptPlugin = os.path.join(scriptPath, plugin)
+                    if os.path.exists(scriptPlugin):
+                        plugin = scriptPlugin
+            exec(open(plugin, 'r').read())
+        except Exception as e:
+            print('Error loading plugin: %s' % plugin)
+            if verbose:
+                raise e
+
+def PDFiDMain(filenames, options):
+    global plugins
+    plugins = []
+    LoadPlugins(options.plugins, options.verbose)
+
+    if options.csv:
+        if plugins != []:
+            Print(MakeCSVLine((('%s', 'Filename'), ('%s', 'Plugin-name'), ('%s', 'Score'))), options)
+        elif options.select != '':
+            Print('Filename', options)
+
+    for filename in filenames:
+        if options.scan:
+            Scan(filename, options, plugins)
+        else:
+            ProcessFile(filename, options, plugins)
+
+def Main():
+    moredesc = '''
+
+Arguments:
+pdf-file and zip-file can be a single file, several files, and/or @file
+@file: run PDFiD on each file listed in the text file specified
+wildcards are supported
+
+Source code put in the public domain by Didier Stevens, no Copyright
+Use at your own risk
+https://DidierStevens.com'''
+
+    oParser = optparse.OptionParser(usage='usage: %prog [options] [pdf-file|zip-file|url|@file] ...\n' + __description__ + moredesc, version='%prog ' + __version__)
+    oParser.add_option('-s', '--scan', action='store_true', default=False, help='scan the given directory')
+    oParser.add_option('-a', '--all', action='store_true', default=False, help='display all the names')
+    oParser.add_option('-e', '--extra', action='store_true', default=False, help='display extra data, like dates')
+    oParser.add_option('-f', '--force', action='store_true', default=False, help='force the scan of the file, even without proper %PDF header')
+    oParser.add_option('-d', '--disarm', action='store_true', default=False, help='disable JavaScript and auto launch')
+    oParser.add_option('-p', '--plugins', type=str, default='', help='plugins to load (separate plugins with a comma , ; @file supported)')
+    oParser.add_option('-c', '--csv', action='store_true', default=False, help='output csv data when using plugins')
+    oParser.add_option('-m', '--minimumscore', type=float, default=0.0, help='minimum score for plugin results output')
+    oParser.add_option('-v', '--verbose', action='store_true', default=False, help='verbose (will also raise catched exceptions)')
+    oParser.add_option('-S', '--select', type=str, default='', help='selection expression')
+    oParser.add_option('-o', '--output', type=str, default='', help='output to log file')
+    (options, args) = oParser.parse_args()
+
+    if len(args) == 0:
+        if options.disarm:
+            print('Option disarm not supported with stdin')
+            options.disarm = False
+        if options.scan:
+            print('Option scan not supported with stdin')
+            options.scan = False
+        filenames = ['']
+    else:
+        try:
+            filenames = ExpandFilenameArguments(args)
+        except Exception as e:
+            print(e)
+            return
+    PDFiDMain(filenames, options)
+
+if __name__ == '__main__':
+    Main()
--- a/fs_filecheck/usr/local/bin/plugin_embeddedfile.py
+++ b/fs_filecheck/usr/local/bin/plugin_embeddedfile.py
@ -0,0 +1,21 @@
+#!/usr/bin/env python
+
+#2014/10/13
+
+class cPDFiDEmbeddedFile(cPluginParent):
+#    onlyValidPDF = True
+    name = 'EmbeddedFile plugin'
+
+    def __init__(self, oPDFiD):
+        self.oPDFiD = oPDFiD
+
+    def Score(self):
+        if '/EmbeddedFile' in self.oPDFiD.keywords and self.oPDFiD.keywords['/EmbeddedFile'].count > 0:
+            if self.oPDFiD.keywords['/EmbeddedFile'].hexcode > 0:
+                return 1.0
+            else:
+                return 0.9
+        else:
+            return 0.0
+
+AddPlugin(cPDFiDEmbeddedFile)
--- a/fs_filecheck/usr/local/bin/plugin_list
+++ b/fs_filecheck/usr/local/bin/plugin_list
@ -0,0 +1,3 @@
+plugin_embeddedfile.py
+plugin_nameobfuscation.py
+plugin_triage.py
--- a/fs_filecheck/usr/local/bin/plugin_nameobfuscation.py
+++ b/fs_filecheck/usr/local/bin/plugin_nameobfuscation.py
@ -0,0 +1,19 @@
+#!/usr/bin/env python
+
+#2013/11/04
+#2013/11/08
+
+class cPDFiDNameObfuscation(cPluginParent):
+#    onlyValidPDF = True
+    name = 'Name Obfuscation plugin'
+
+    def __init__(self, oPDFiD):
+        self.oPDFiD = oPDFiD
+
+    def Score(self):
+        if sum([oCount.hexcode for oCount in self.oPDFiD.keywords.values()]) > 0:
+            return 1.0
+        else:
+            return 0.0
+
+AddPlugin(cPDFiDNameObfuscation)
--- a/fs_filecheck/usr/local/bin/plugin_triage.py
+++ b/fs_filecheck/usr/local/bin/plugin_triage.py
@ -0,0 +1,22 @@
+#!/usr/bin/env python
+
+#2014/09/30
+
+class cPDFiDTriage(cPluginParent):
+#    onlyValidPDF = True
+    name = 'Triage plugin'
+
+    def __init__(self, oPDFiD):
+        self.oPDFiD = oPDFiD
+
+    def Score(self):
+        for keyword in ('/ObjStm', '/JS', '/JavaScript', '/AA', '/OpenAction', '/AcroForm', '/JBIG2Decode', '/RichMedia', '/Launch', '/EmbeddedFile', '/XFA', '/Colors > 2^24'):
+            if keyword in self.oPDFiD.keywords and self.oPDFiD.keywords[keyword].count > 0:
+                return 1.0
+        if self.oPDFiD.keywords['obj'].count != self.oPDFiD.keywords['endobj'].count:
+            return 1.0
+        if self.oPDFiD.keywords['stream'].count != self.oPDFiD.keywords['endstream'].count:
+            return 1.0
+        return 0.0
+
+AddPlugin(cPDFiDTriage)
--- a/mount_image.sh
+++ b/mount_image.sh
@ -24,14 +24,14 @@ set -e
 set -x

 # If you use a partition...
-#PARTITION_ROOTFS='/dev/mmcblk0p2'
-#PARTITION_BOOT='/dev/mmcblk0p1'
-PARTITION_ROOTFS='/dev/sdd2'
-PARTITION_BOOT='/dev/sdd1'
+PARTITION_ROOTFS='/dev/mmcblk0p2'
+PARTITION_BOOT='/dev/mmcblk0p1'
+#PARTITION_ROOTFS='/dev/sdd2'
+#PARTITION_BOOT='/dev/sdd1'

 # If you use the img
 ##### Debian
-IMAGE='2015-02-16-raspbian-wheezy.img'
+IMAGE='2015-11-06-CIRCLean.img'
 OFFSET_ROOTFS=$((122880 * 512))
 OFFSET_BOOT=$((8192 * 512))
 ##### Arch
--- a/tests/run.exp
+++ b/tests/run.exp
@ -2,7 +2,7 @@

 set timeout -1

-spawn qemu-system-arm -kernel 140801-kernel -cpu arm1176 -m 256 -M versatilepb \
+spawn qemu-system-arm -kernel kernel-qemu -cpu arm1176 -m 256 -M versatilepb \
    -append "root=/dev/sdc2 panic=1 rootfstype=ext4 ro console=ttyAMA0 console=ttyS0" \
    -drive file=[lindex $argv 1],index=0,media=disk \
    -drive file=[lindex $argv 2],index=1,media=disk \
--- a/tests/run.sh
+++ b/tests/run.sh
@ -2,7 +2,7 @@

 # http://pub.phyks.me/respawn/mypersonaldata/public/2014-05-20-11-08-01/

-IMAGE='../2015-02-16-raspbian-wheezy.img'
+IMAGE='../raspbian-wheezy.img'
 OFFSET_ROOTFS=$((122880 * 512))

 IMAGE_VFAT_NORM="testcase.vfat"
				`@ -0,0 +1 @@`
				`SUBSYSTEM=="hidraw", DRIVERS=="usbhid", RUN+="/bin/bash -c 'cd /sys/devices/platform/bcm2708_usb/usb1/1-1 ; for d in $(ls -d 1-1.[2-5]); do if [ $(ls -lR $d \| grep -c usbhid) -gt 0 ] ; then echo 0 > $d/authorized ; fi ; done'"`