diff --git a/README_filecheck.md b/README_filecheck.md new file mode 100644 index 0000000..23b718b --- /dev/null +++ b/README_filecheck.md @@ -0,0 +1,86 @@ +Install Qemu and Expect +============ + +Install the necessary packages: + +``` + sudo apt-get install qemu qemu-user-static expect +``` + +Create a new image from scratch +=============================== + +* Download the most recent Raspbian version: + http://downloads.raspberrypi.org/raspbian_latest + +* Unpack it: + +``` + unzip 2015-05-05-raspbian-wheezy.zip + mv 2015-05-05-raspbian-wheezy.zip raspbian-wheezy.zip +``` + +Prepare the image +================= + +It will be used for the build environment and the final image. + +* [Add empty space to the image](resize_img.md) + +* Chroot in the image + +``` + sudo ./proper_chroot.sh +``` + +* Change your user to root (your global variables may be broken) + +``` + su root +``` + +* The locales may be broken, fix it (remove `en_GB.UTF-8 UTF-8`, set `en_US.UTF-8 UTF-8`): + +``` + dpkg-reconfigure locales +``` + +* In the image, make sure everything is up-to-date, and remove the old packages + +``` + apt-get update + apt-get dist-upgrade + apt-get autoremove + apt-get install p7zip-full python-dev libxml2-dev libxslt1-dev pmount +``` + +* Install python requirements + +``` + pip install lxml + pip install oletools olefile + pip install officedissector + pip install git+https://github.com/Rafiot/python-magic.git@travis + pip install git+https://github.com/CIRCL/PyCIRCLean.git +``` + +* Create the user and mtab for a RO filesystem + +``` + useradd -m kitten + chown -R kitten:kitten /home/kitten + ln -s /proc/mounts /etc/mtab +``` + +* Copy the files + +``` + sudo ./copy_to_final.sh /mnt/arm_rPi/ +``` + +* Enable rc.local + +``` + systemctl enable rc-local.service +``` + diff --git a/copy_to_final.sh b/copy_to_final.sh index 8963c7a..d7ee10b 100755 --- a/copy_to_final.sh +++ b/copy_to_final.sh @@ -23,7 +23,7 @@ fi #cp deb/*.deb ${CHROOT_PATH}/ # prepare fs archive -tar -cvpzf backup.tar.gz -C fs/ . +tar -cvpzf backup.tar.gz -C fs_filecheck/ . tar -xzf backup.tar.gz -C ${CHROOT_PATH}/ chown root:root ${CHROOT_PATH}/etc/sudoers if [ -f deb/led ]; then diff --git a/fs/etc/udev/rules.d/50-blockhid.rules b/fs/etc/udev/rules.d/50-blockhid.rules new file mode 100644 index 0000000..0a36ea0 --- /dev/null +++ b/fs/etc/udev/rules.d/50-blockhid.rules @@ -0,0 +1 @@ +SUBSYSTEM=="hidraw", DRIVERS=="usbhid", RUN+="/bin/bash -c 'cd /sys/devices/platform/bcm2708_usb/usb1/1-1 ; for d in $(ls -d 1-1.[2-5]); do if [ $(ls -lR $d | grep -c usbhid) -gt 0 ] ; then echo 0 > $d/authorized ; fi ; done'" diff --git a/fs_filecheck/etc/fstab b/fs_filecheck/etc/fstab new file mode 100644 index 0000000..0aae1ee --- /dev/null +++ b/fs_filecheck/etc/fstab @@ -0,0 +1,6 @@ +proc /proc proc defaults 0 0 +/dev/mmcblk0p1 /boot vfat ro,defaults 0 0 +/dev/mmcblk0p2 / ext4 ro,defaults,noatime 0 0 +tmpfs /tmp tmpfs rw,size=64M,noexec,nodev,nosuid,mode=1777 0 0 +tmpfs /media tmpfs rw,size=64M,noexec,nodev,nosuid,mode=1777 0 0 +# a swapfile is not a swap partition, so no using swapon|off from here on, use dphys-swapfile swap[on|off] for that diff --git a/fs_filecheck/etc/group b/fs_filecheck/etc/group new file mode 100644 index 0000000..6a7844f --- /dev/null +++ b/fs_filecheck/etc/group @@ -0,0 +1,54 @@ +root:x:0: +daemon:x:1: +bin:x:2: +sys:x:3: +adm:x:4:pi +tty:x:5: +disk:x:6: +lp:x:7: +mail:x:8: +news:x:9: +uucp:x:10: +man:x:12: +proxy:x:13: +kmem:x:15: +dialout:x:20:pi +fax:x:21: +voice:x:22: +cdrom:x:24:pi +floppy:x:25: +tape:x:26: +sudo:x:27:pi +audio:x:29:pi +dip:x:30: +www-data:x:33: +backup:x:34: +operator:x:37: +list:x:38: +irc:x:39: +src:x:40: +gnats:x:41: +shadow:x:42: +utmp:x:43: +video:x:44:pi +sasl:x:45: +plugdev:x:46:pi,kitten +staff:x:50: +games:x:60:pi +users:x:100:pi +nogroup:x:65534: +libuuid:x:101: +crontab:x:102: +pi:x:1000: +ssh:x:103: +ntp:x:104: +netdev:x:105:pi +input:x:999:pi +messagebus:x:106: +lpadmin:x:107: +fuse:x:108: +lightdm:x:109: +indiecity:x:1001:root +spi:x:1002:pi +gpio:x:1003:pi +kitten:x:1004: diff --git a/fs_filecheck/etc/pam.d/su b/fs_filecheck/etc/pam.d/su new file mode 100644 index 0000000..e222071 --- /dev/null +++ b/fs_filecheck/etc/pam.d/su @@ -0,0 +1,61 @@ +# +# The PAM configuration file for the Shadow `su' service +# + +# This allows root to su without passwords (normal operation) +auth sufficient pam_rootok.so + +# Uncomment this to force users to be a member of group root +# before they can use `su'. You can also add "group=foo" +# to the end of this line if you want to use a group other +# than the default "root" (but this may have side effect of +# denying "root" user, unless she's a member of "foo" or explicitly +# permitted earlier by e.g. "sufficient pam_rootok.so"). +# (Replaces the `SU_WHEEL_ONLY' option from login.defs) +# auth required pam_wheel.so + +# Uncomment this if you want wheel members to be able to +# su without a password. +# auth sufficient pam_wheel.so trust + +# Uncomment this if you want members of a specific group to not +# be allowed to use su at all. +# auth required pam_wheel.so deny group=nosu + +# Uncomment and edit /etc/security/time.conf if you need to set +# time restrainst on su usage. +# (Replaces the `PORTTIME_CHECKS_ENAB' option from login.defs +# as well as /etc/porttime) +# account requisite pam_time.so + +# This module parses environment configuration file(s) +# and also allows you to use an extended config +# file /etc/security/pam_env.conf. +# +# parsing /etc/environment needs "readenv=1" +session required pam_env.so readenv=1 +# locale variables are also kept into /etc/default/locale in etch +# reading this file *in addition to /etc/environment* does not hurt +session required pam_env.so readenv=1 envfile=/etc/default/locale + +# Defines the MAIL environment variable +# However, userdel also needs MAIL_DIR and MAIL_FILE variables +# in /etc/login.defs to make sure that removing a user +# also removes the user's mail spool file. +# See comments in /etc/login.defs +# +# "nopen" stands to avoid reporting new mail when su'ing to another user +session optional pam_mail.so nopen + +# Sets up user limits according to /etc/security/limits.conf +# (Replaces the use of /etc/limits in old login) +#session required pam_limits.so + +# The standard Unix authentication modules, used with +# NIS (man nsswitch) as well as normal /etc/passwd and +# /etc/shadow entries. +@include common-auth +@include common-account +@include common-session + + diff --git a/fs_filecheck/etc/passwd b/fs_filecheck/etc/passwd new file mode 100644 index 0000000..e8fa943 --- /dev/null +++ b/fs_filecheck/etc/passwd @@ -0,0 +1,30 @@ +root:x:0:0:root:/root:/bin/bash +daemon:x:1:1:daemon:/usr/sbin:/usr/sbin/nologin +bin:x:2:2:bin:/bin:/usr/sbin/nologin +sys:x:3:3:sys:/dev:/usr/sbin/nologin +sync:x:4:65534:sync:/bin:/bin/sync +games:x:5:60:games:/usr/games:/usr/sbin/nologin +man:x:6:12:man:/var/cache/man:/usr/sbin/nologin +lp:x:7:7:lp:/var/spool/lpd:/usr/sbin/nologin +mail:x:8:8:mail:/var/mail:/usr/sbin/nologin +news:x:9:9:news:/var/spool/news:/usr/sbin/nologin +uucp:x:10:10:uucp:/var/spool/uucp:/usr/sbin/nologin +proxy:x:13:13:proxy:/bin:/usr/sbin/nologin +www-data:x:33:33:www-data:/var/www:/usr/sbin/nologin +backup:x:34:34:backup:/var/backups:/usr/sbin/nologin +list:x:38:38:Mailing List Manager:/var/list:/usr/sbin/nologin +irc:x:39:39:ircd:/var/run/ircd:/usr/sbin/nologin +gnats:x:41:41:Gnats Bug-Reporting System (admin):/var/lib/gnats:/usr/sbin/nologin +nobody:x:65534:65534:nobody:/nonexistent:/usr/sbin/nologin +systemd-timesync:x:100:103:systemd Time Synchronization,,,:/run/systemd:/bin/false +systemd-network:x:101:104:systemd Network Management,,,:/run/systemd/netif:/bin/false +systemd-resolve:x:102:105:systemd Resolver,,,:/run/systemd/resolve:/bin/false +systemd-bus-proxy:x:103:106:systemd Bus Proxy,,,:/run/systemd:/bin/false +pi:x:1000:1000:,,,:/home/pi:/bin/false +sshd:x:104:65534::/var/run/sshd:/usr/sbin/nologin +messagebus:x:105:110::/var/run/dbus:/bin/false +avahi:x:106:111:Avahi mDNS daemon,,,:/var/run/avahi-daemon:/bin/false +ntp:x:107:112::/home/ntp:/bin/false +statd:x:108:65534::/var/lib/nfs:/bin/false +lightdm:x:109:114:Light Display Manager:/var/lib/lightdm:/bin/false +kitten:x:1001:1004::/home/kitten:/bin/bash diff --git a/fs_filecheck/etc/pmount.allow b/fs_filecheck/etc/pmount.allow new file mode 100644 index 0000000..e9bb419 --- /dev/null +++ b/fs_filecheck/etc/pmount.allow @@ -0,0 +1,5 @@ +# /etc/pmount.allow +# pmount will allow users to additionally mount all devices that are +# listed here. +/dev/sdb1 +/dev/sda* diff --git a/fs_filecheck/etc/profile.d/raspi-config.sh b/fs_filecheck/etc/profile.d/raspi-config.sh new file mode 100644 index 0000000..0206061 --- /dev/null +++ b/fs_filecheck/etc/profile.d/raspi-config.sh @@ -0,0 +1,19 @@ +#!/bin/sh +# Part of raspi-config http://github.com/asb/raspi-config +# +# See LICENSE file for copyright and license details + +# Should be installed to /etc/profile.d/raspi-config.sh to force raspi-config +# to run at initial login + +# You may also want to set automatic login in /etc/inittab on tty1 by adding a +# line such as: +# 1:2345:respawn:/bin/login -f root tty1 /dev/tty1 2>&1 # RPICFG_TO_DISABLE + +if [ $(id -u) -ne 0 ]; then + printf "\nNOTICE: the software on this Raspberry Pi has not been fully configured. Please run 'sudo raspi-config'\n\n" +else + # Disable raspi-config at the first run. + # raspi-config + exec login -f pi +fi diff --git a/fs_filecheck/etc/rc.local b/fs_filecheck/etc/rc.local new file mode 100755 index 0000000..6c2e691 --- /dev/null +++ b/fs_filecheck/etc/rc.local @@ -0,0 +1,36 @@ +#!/bin/sh -e +# +# rc.local +# +# This script is executed at the end of each multiuser runlevel. +# Make sure that the script will "exit 0" on success or any other +# value on error. +# +# In order to enable or disable this script just change the execution +# bits. +# +# By default this script does nothing. + +clean(){ + echo 'Rc Local done, quit.' + /sbin/shutdown -P -h now +} + +# Print the IP address +_IP=$(hostname -I) || true +if [ "$_IP" ]; then + printf "My IP address is %s\n" "$_IP" +fi + +if [ -e /dev/sda ]; then + if [ -e /dev/sdb ]; then + # avoid possible misuse + /sbin/ifconfig eth0 down + trap clean EXIT TERM INT + cd /opt/groomer + /usr/sbin/led & + ./init.sh + fi +fi + +exit 0 diff --git a/fs_filecheck/etc/security/limits.conf b/fs_filecheck/etc/security/limits.conf new file mode 100644 index 0000000..e8be267 --- /dev/null +++ b/fs_filecheck/etc/security/limits.conf @@ -0,0 +1 @@ +kitten hard priority -20 diff --git a/fs_filecheck/etc/sudoers b/fs_filecheck/etc/sudoers new file mode 100644 index 0000000..7bdb828 --- /dev/null +++ b/fs_filecheck/etc/sudoers @@ -0,0 +1,28 @@ +# +# This file MUST be edited with the 'visudo' command as root. +# +# Please consider adding local content in /etc/sudoers.d/ instead of +# directly modifying this file. +# +# See the man page for details on how to write a sudoers file. +# +Defaults env_reset +Defaults mail_badpass +Defaults secure_path="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" + +# Host alias specification + +# User alias specification + +# Cmnd alias specification + +# User privilege specification +#root ALL=(ALL:ALL) ALL + +# Allow members of group sudo to execute any command +#%sudo ALL=(ALL:ALL) ALL + +# See sudoers(5) for more information on "#include" directives: + +#includedir /etc/sudoers.d +#pi ALL=(ALL) NOPASSWD: ALL diff --git a/fs_filecheck/etc/systemd/system/rc-local.service b/fs_filecheck/etc/systemd/system/rc-local.service new file mode 100644 index 0000000..7b0d9a7 --- /dev/null +++ b/fs_filecheck/etc/systemd/system/rc-local.service @@ -0,0 +1,12 @@ +[Unit] +Description=/etc/rc.local Compatibility + +[Service] +Type=oneshot +ExecStart=/etc/rc.local +TimeoutSec=0 +StandardInput=tty +RemainAfterExit=yes + +[Install] +WantedBy=multi-user.target diff --git a/fs_filecheck/etc/udev/rules.d/50-blockhid.rules b/fs_filecheck/etc/udev/rules.d/50-blockhid.rules new file mode 100644 index 0000000..0a36ea0 --- /dev/null +++ b/fs_filecheck/etc/udev/rules.d/50-blockhid.rules @@ -0,0 +1 @@ +SUBSYSTEM=="hidraw", DRIVERS=="usbhid", RUN+="/bin/bash -c 'cd /sys/devices/platform/bcm2708_usb/usb1/1-1 ; for d in $(ls -d 1-1.[2-5]); do if [ $(ls -lR $d | grep -c usbhid) -gt 0 ] ; then echo 0 > $d/authorized ; fi ; done'" diff --git a/fs_filecheck/etc/udev/rules.d/90-qemu.rules b/fs_filecheck/etc/udev/rules.d/90-qemu.rules new file mode 100644 index 0000000..07dfc21 --- /dev/null +++ b/fs_filecheck/etc/udev/rules.d/90-qemu.rules @@ -0,0 +1,2 @@ +KERNEL=="sdc", SYMLINK+="mmcblk0" +KERNEL=="sdc?", SYMLINK+="mmcblk0p%n", diff --git a/fs_filecheck/opt/groomer/constraint.sh b/fs_filecheck/opt/groomer/constraint.sh new file mode 100644 index 0000000..fcf899e --- /dev/null +++ b/fs_filecheck/opt/groomer/constraint.sh @@ -0,0 +1,23 @@ +DEV_SRC='/dev/sda' +DEV_DST='sdb1' + +# User allowed to do the following commands without password +USERNAME='kitten' +MUSIC="/opt/midi/" + +ID=`/usr/bin/id -u` + +# Paths used in multiple scripts +SRC="src" +DST="dst" +TEMP="/media/${DST}/temp" +ZIPTEMP="/media/${DST}/ziptemp" +LOGS="/media/${DST}/logs" + + +# commands +SYNC='/bin/sync' +TIMIDITY='/usr/bin/timidity' +MOUNT='/bin/mount' +PMOUNT='/usr/bin/pmount -A -s' +PUMOUNT='/usr/bin/pumount' diff --git a/fs_filecheck/opt/groomer/groomer.sh b/fs_filecheck/opt/groomer/groomer.sh new file mode 100755 index 0000000..d085aad --- /dev/null +++ b/fs_filecheck/opt/groomer/groomer.sh @@ -0,0 +1,112 @@ +#!/bin/bash + +set -e +set -x + +source ./constraint.sh +if ! [ "${ID}" -ge "1000" ]; then + echo "This script cannot run as root." + exit +fi + +clean(){ + echo Cleaning. + ${SYNC} + + # Cleanup source + pumount ${SRC} + + # Cleanup destination + rm -rf ${TEMP} + rm -rf ${ZIPTEMP} + pumount ${DST} + + exit +} + +trap clean EXIT TERM INT + +# De we have a source device +if [ ! -b ${DEV_SRC} ]; then + echo "Source device (${DEV_SRC}) does not exists." + exit +fi +# Find the partition names on the source device +DEV_PARTITIONS=`ls "${DEV_SRC}"* | grep "${DEV_SRC}[1-9][0-6]*" || true` +if [ -z "${DEV_PARTITIONS}" ]; then + echo "${DEV_SRC} does not have any partitions." + exit +fi + +# Do we have a destination device +if [ ! -b "/dev/${DEV_DST}" ]; then + echo "Destination device (/dev/${DEV_DST}) does not exists." + exit +fi + +# mount and prepare destination device +if ${MOUNT}|grep ${DST}; then + ${PUMOUNT} ${DST} || true +fi +# uid= only works on a vfat FS. What should wedo if we get an ext* FS ? +${PMOUNT} -w ${DEV_DST} ${DST} +if [ ${?} -ne 0 ]; then + echo "Unable to mount /dev/${DEV_DST} on /media/${DST}" + exit +else + echo "Target USB device (/dev/${DEV_DST}) mounted at /media/${DST}" + rm -rf "/media/${DST}/FROM_PARTITION_"* + + # prepare temp dirs and make sure it's empty + mkdir -p "${TEMP}" + mkdir -p "${ZIPTEMP}" + mkdir -p "${LOGS}" + + rm -rf "${TEMP}/"* + rm -rf "${ZIPTEMP}/"* + rm -rf "${LOGS}/"* +fi + +# Groom da kitteh! + +# Find the FS types +# lsblk -n -o name,fstype,mountpoint,label,uuid -r + +PARTCOUNT=1 +for partition in ${DEV_PARTITIONS} +do + # Processing a partition + echo "Processing partition: ${partition}" + if [ `${MOUNT} | grep -c ${SRC}` -ne 0 ]; then + ${PUMOUNT} ${SRC} + fi + + ${PMOUNT} -w ${partition} ${SRC} + ls "/media/${SRC}" | grep -i autorun.inf | xargs -I {} mv "/media/${SRC}"/{} "/media/${SRC}"/DANGEROUS_{}_DANGEROUS || true + ${PUMOUNT} ${SRC} + ${PMOUNT} -r ${partition} ${SRC} + if [ ${?} -ne 0 ]; then + echo "Unable to mount ${partition} on /media/${SRC}" + else + echo "${partition} mounted at /media/${SRC}" + + # Print the filenames on the current partition in a logfile + find "/media/${SRC}" -fls "${LOGS}/Content_partition_${PARTCOUNT}.txt" + + # create a directory on ${DST} named PARTION_$PARTCOUNT + target_dir="/media/${DST}/FROM_PARTITION_${PARTCOUNT}" + echo "copying to: ${target_dir}" + mkdir -p "${target_dir}" + LOGFILE="${LOGS}/processing.txt" + + echo "==== Starting processing of /media/${SRC} to ${target_dir}. ====" >> ${LOGFILE} + filecheck.py --source /media/${SRC} --destination ${target_dir} || true + echo "==== Done with /media/${SRC} to ${target_dir}. ====" >> ${LOGFILE} + + ls -lR "${target_dir}" + fi + let PARTCOUNT=`expr $PARTCOUNT + 1` +done + +# The cleanup is automatically done in the function clean called when +# the program quits diff --git a/fs_filecheck/opt/groomer/init.sh b/fs_filecheck/opt/groomer/init.sh new file mode 100755 index 0000000..0deb56d --- /dev/null +++ b/fs_filecheck/opt/groomer/init.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +set -e +set -x + +source ./constraint.sh + +if [ ${ID} -ne 0 ]; then + echo "This script has to be run as root." + exit +fi + +clean(){ + echo Done, cleaning. + ${SYNC} + kill -9 $(cat /tmp/music.pid) + rm -f /tmp/music.pid +} + +trap clean EXIT TERM INT + +./music.sh & +echo $! > /tmp/music.pid + +su ${USERNAME} -c ./groomer.sh + diff --git a/fs_filecheck/opt/groomer/music.sh b/fs_filecheck/opt/groomer/music.sh new file mode 100755 index 0000000..8c769f0 --- /dev/null +++ b/fs_filecheck/opt/groomer/music.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +set -e +#set -x + +source ./constraint.sh + +killed(){ + echo 'Music stopped.' +} + +trap killed EXIT TERM INT + +# Force output on analog +amixer cset numid=3 1 + +files=(${MUSIC}*) + +while true; do + $TIMIDITY ${files[RANDOM % ${#files[@]}]} +done diff --git a/fs_filecheck/usr/local/bin/pdfid.py b/fs_filecheck/usr/local/bin/pdfid.py new file mode 100644 index 0000000..95c5b76 --- /dev/null +++ b/fs_filecheck/usr/local/bin/pdfid.py @@ -0,0 +1,930 @@ +#!/usr/bin/env python + +__description__ = 'Tool to test a PDF file' +__author__ = 'Didier Stevens' +__version__ = '0.2.1' +__date__ = '2014/10/18' + +""" + +Tool to test a PDF file + +Source code put in public domain by Didier Stevens, no Copyright +https://DidierStevens.com +Use at your own risk + +History: + 2009/03/27: start + 2009/03/28: scan option + 2009/03/29: V0.0.2: xml output + 2009/03/31: V0.0.3: /ObjStm suggested by Dion + 2009/04/02: V0.0.4: added ErrorMessage + 2009/04/20: V0.0.5: added Dates + 2009/04/21: V0.0.6: added entropy + 2009/04/22: added disarm + 2009/04/29: finished disarm + 2009/05/13: V0.0.7: added cPDFEOF + 2009/07/24: V0.0.8: added /AcroForm and /RichMedia, simplified %PDF header regex, extra date format (without TZ) + 2009/07/25: added input redirection, option --force + 2009/10/13: V0.0.9: added detection for CVE-2009-3459; added /RichMedia to disarm + 2010/01/11: V0.0.10: relaxed %PDF header checking + 2010/04/28: V0.0.11: added /Launch + 2010/09/21: V0.0.12: fixed cntCharsAfterLastEOF bug; fix by Russell Holloway + 2011/12/29: updated for Python 3, added keyword /EmbeddedFile + 2012/03/03: added PDFiD2JSON; coded by Brandon Dixon + 2013/02/10: V0.1.0: added http/https support; added support for ZIP file with password 'infected' + 2013/03/11: V0.1.1: fixes for Python 3 + 2013/03/13: V0.1.2: Added error handling for files; added /XFA + 2013/11/01: V0.2.0: Added @file & plugins + 2013/11/02: continue + 2013/11/04: added options -c, -m, -v + 2013/11/06: added option -S + 2013/11/08: continue + 2013/11/09: added option -o + 2013/11/15: refactoring + 2014/09/30: added CSV header + 2014/10/16: V0.2.1: added output when plugin & file not pdf + 2014/10/18: some fixes for Python 3 + +Todo: + - update XML example (entropy, EOF) + - code review, cleanup +""" + +import optparse +import os +import re +import xml.dom.minidom +import traceback +import math +import operator +import os.path +import sys +import json +import zipfile +import collections +import glob +try: + import urllib2 + urllib23 = urllib2 +except: + import urllib.request + urllib23 = urllib.request + +#Convert 2 Bytes If Python 3 +def C2BIP3(string): + if sys.version_info[0] > 2: + return bytes([ord(x) for x in string]) + else: + return string + +class cBinaryFile: + def __init__(self, file): + self.file = file + if file == '': + self.infile = sys.stdin + elif file.lower().startswith('http://') or file.lower().startswith('https://'): + try: + if sys.hexversion >= 0x020601F0: + self.infile = urllib23.urlopen(file, timeout=5) + else: + self.infile = urllib23.urlopen(file) + except urllib23.HTTPError: + print('Error accessing URL %s' % file) + print(sys.exc_info()[1]) + sys.exit() + elif file.lower().endswith('.zip'): + try: + self.zipfile = zipfile.ZipFile(file, 'r') + self.infile = self.zipfile.open(self.zipfile.infolist()[0], 'r', C2BIP3('infected')) + except: + print('Error opening file %s' % file) + print(sys.exc_info()[1]) + sys.exit() + else: + try: + self.infile = open(file, 'rb') + except: + print('Error opening file %s' % file) + print(sys.exc_info()[1]) + sys.exit() + self.ungetted = [] + + def byte(self): + if len(self.ungetted) != 0: + return self.ungetted.pop() + inbyte = self.infile.read(1) + if not inbyte or inbyte == '': + self.infile.close() + return None + return ord(inbyte) + + def bytes(self, size): + if size <= len(self.ungetted): + result = self.ungetted[0:size] + del self.ungetted[0:size] + return result + inbytes = self.infile.read(size - len(self.ungetted)) + if inbytes == '': + self.infile.close() + if type(inbytes) == type(''): + result = self.ungetted + [ord(b) for b in inbytes] + else: + result = self.ungetted + [b for b in inbytes] + self.ungetted = [] + return result + + def unget(self, byte): + self.ungetted.append(byte) + + def ungets(self, bytes): + bytes.reverse() + self.ungetted.extend(bytes) + +class cPDFDate: + def __init__(self): + self.state = 0 + + def parse(self, char): + if char == 'D': + self.state = 1 + return None + elif self.state == 1: + if char == ':': + self.state = 2 + self.digits1 = '' + else: + self.state = 0 + return None + elif self.state == 2: + if len(self.digits1) < 14: + if char >= '0' and char <= '9': + self.digits1 += char + return None + else: + self.state = 0 + return None + elif char == '+' or char == '-' or char == 'Z': + self.state = 3 + self.digits2 = '' + self.TZ = char + return None + elif char == '"': + self.state = 0 + self.date = 'D:' + self.digits1 + return self.date + elif char < '0' or char > '9': + self.state = 0 + self.date = 'D:' + self.digits1 + return self.date + else: + self.state = 0 + return None + elif self.state == 3: + if len(self.digits2) < 2: + if char >= '0' and char <= '9': + self.digits2 += char + return None + else: + self.state = 0 + return None + elif len(self.digits2) == 2: + if char == "'": + self.digits2 += char + return None + else: + self.state = 0 + return None + elif len(self.digits2) < 5: + if char >= '0' and char <= '9': + self.digits2 += char + if len(self.digits2) == 5: + self.state = 0 + self.date = 'D:' + self.digits1 + self.TZ + self.digits2 + return self.date + else: + return None + else: + self.state = 0 + return None + +def fEntropy(countByte, countTotal): + x = float(countByte) / countTotal + if x > 0: + return - x * math.log(x, 2) + else: + return 0.0 + +class cEntropy: + def __init__(self): + self.allBucket = [0 for i in range(0, 256)] + self.streamBucket = [0 for i in range(0, 256)] + + def add(self, byte, insideStream): + self.allBucket[byte] += 1 + if insideStream: + self.streamBucket[byte] += 1 + + def removeInsideStream(self, byte): + if self.streamBucket[byte] > 0: + self.streamBucket[byte] -= 1 + + def calc(self): + self.nonStreamBucket = map(operator.sub, self.allBucket, self.streamBucket) + allCount = sum(self.allBucket) + streamCount = sum(self.streamBucket) + nonStreamCount = sum(self.nonStreamBucket) + return (allCount, sum(map(lambda x: fEntropy(x, allCount), self.allBucket)), streamCount, sum(map(lambda x: fEntropy(x, streamCount), self.streamBucket)), nonStreamCount, sum(map(lambda x: fEntropy(x, nonStreamCount), self.nonStreamBucket))) + +class cPDFEOF: + def __init__(self): + self.token = '' + self.cntEOFs = 0 + + def parse(self, char): + if self.cntEOFs > 0: + self.cntCharsAfterLastEOF += 1 + if self.token == '' and char == '%': + self.token += char + return + elif self.token == '%' and char == '%': + self.token += char + return + elif self.token == '%%' and char == 'E': + self.token += char + return + elif self.token == '%%E' and char == 'O': + self.token += char + return + elif self.token == '%%EO' and char == 'F': + self.token += char + return + elif self.token == '%%EOF' and (char == '\n' or char == '\r' or char == ' ' or char == '\t'): + self.cntEOFs += 1 + self.cntCharsAfterLastEOF = 0 + if char == '\n': + self.token = '' + else: + self.token += char + return + elif self.token == '%%EOF\r': + if char == '\n': + self.cntCharsAfterLastEOF = 0 + self.token = '' + else: + self.token = '' + +def FindPDFHeaderRelaxed(oBinaryFile): + bytes = oBinaryFile.bytes(1024) + index = ''.join([chr(byte) for byte in bytes]).find('%PDF') + if index == -1: + oBinaryFile.ungets(bytes) + return ([], None) + for endHeader in range(index + 4, index + 4 + 10): + if bytes[endHeader] == 10 or bytes[endHeader] == 13: + break + oBinaryFile.ungets(bytes[endHeader:]) + return (bytes[0:endHeader], ''.join([chr(byte) for byte in bytes[index:endHeader]])) + +def Hexcode2String(char): + if type(char) == int: + return '#%02x' % char + else: + return char + +def SwapCase(char): + if type(char) == int: + return ord(chr(char).swapcase()) + else: + return char.swapcase() + +def HexcodeName2String(hexcodeName): + return ''.join(map(Hexcode2String, hexcodeName)) + +def SwapName(wordExact): + return map(SwapCase, wordExact) + +def UpdateWords(word, wordExact, slash, words, hexcode, allNames, lastName, insideStream, oEntropy, fOut): + if word != '': + if slash + word in words: + words[slash + word][0] += 1 + if hexcode: + words[slash + word][1] += 1 + elif slash == '/' and allNames: + words[slash + word] = [1, 0] + if hexcode: + words[slash + word][1] += 1 + if slash == '/': + lastName = slash + word + if slash == '': + if word == 'stream': + insideStream = True + if word == 'endstream': + if insideStream == True and oEntropy != None: + for char in 'endstream': + oEntropy.removeInsideStream(ord(char)) + insideStream = False + if fOut != None: + if slash == '/' and '/' + word in ('/JS', '/JavaScript', '/AA', '/OpenAction', '/JBIG2Decode', '/RichMedia', '/Launch'): + wordExactSwapped = HexcodeName2String(SwapName(wordExact)) + fOut.write(C2BIP3(wordExactSwapped)) + print('/%s -> /%s' % (HexcodeName2String(wordExact), wordExactSwapped)) + else: + fOut.write(C2BIP3(HexcodeName2String(wordExact))) + return ('', [], False, lastName, insideStream) + +class cCVE_2009_3459: + def __init__(self): + self.count = 0 + + def Check(self, lastName, word): + if (lastName == '/Colors' and word.isdigit() and int(word) > 2^24): # decided to alert when the number of colors is expressed with more than 3 bytes + self.count += 1 + +def XMLAddAttribute(xmlDoc, name, value=None): + att = xmlDoc.createAttribute(name) + xmlDoc.documentElement.setAttributeNode(att) + if value != None: + att.nodeValue = value + +def PDFiD(file, allNames=False, extraData=False, disarm=False, force=False): + """Example of XML output: + + + + + + + + + + + + + + + + + + + + + + """ + + word = '' + wordExact = [] + hexcode = False + lastName = '' + insideStream = False + keywords = ('obj', + 'endobj', + 'stream', + 'endstream', + 'xref', + 'trailer', + 'startxref', + '/Page', + '/Encrypt', + '/ObjStm', + '/JS', + '/JavaScript', + '/AA', + '/OpenAction', + '/AcroForm', + '/JBIG2Decode', + '/RichMedia', + '/Launch', + '/EmbeddedFile', + '/XFA', + ) + words = {} + dates = [] + for keyword in keywords: + words[keyword] = [0, 0] + slash = '' + xmlDoc = xml.dom.minidom.getDOMImplementation().createDocument(None, 'PDFiD', None) + XMLAddAttribute(xmlDoc, 'Version', __version__) + XMLAddAttribute(xmlDoc, 'Filename', file) + attErrorOccured = XMLAddAttribute(xmlDoc, 'ErrorOccured', 'False') + attErrorMessage = XMLAddAttribute(xmlDoc, 'ErrorMessage', '') + + oPDFDate = None + oEntropy = None + oPDFEOF = None + oCVE_2009_3459 = cCVE_2009_3459() + try: + attIsPDF = xmlDoc.createAttribute('IsPDF') + xmlDoc.documentElement.setAttributeNode(attIsPDF) + oBinaryFile = cBinaryFile(file) + if extraData: + oPDFDate = cPDFDate() + oEntropy = cEntropy() + oPDFEOF = cPDFEOF() + (bytesHeader, pdfHeader) = FindPDFHeaderRelaxed(oBinaryFile) + if disarm: + (pathfile, extension) = os.path.splitext(file) + fOut = open(pathfile + '.disarmed' + extension, 'wb') + for byteHeader in bytesHeader: + fOut.write(C2BIP3(chr(byteHeader))) + else: + fOut = None + if oEntropy != None: + for byteHeader in bytesHeader: + oEntropy.add(byteHeader, insideStream) + if pdfHeader == None and not force: + attIsPDF.nodeValue = 'False' + return xmlDoc + else: + if pdfHeader == None: + attIsPDF.nodeValue = 'False' + pdfHeader = '' + else: + attIsPDF.nodeValue = 'True' + att = xmlDoc.createAttribute('Header') + att.nodeValue = repr(pdfHeader[0:10]).strip("'") + xmlDoc.documentElement.setAttributeNode(att) + byte = oBinaryFile.byte() + while byte != None: + char = chr(byte) + charUpper = char.upper() + if charUpper >= 'A' and charUpper <= 'Z' or charUpper >= '0' and charUpper <= '9': + word += char + wordExact.append(char) + elif slash == '/' and char == '#': + d1 = oBinaryFile.byte() + if d1 != None: + d2 = oBinaryFile.byte() + if d2 != None and (chr(d1) >= '0' and chr(d1) <= '9' or chr(d1).upper() >= 'A' and chr(d1).upper() <= 'F') and (chr(d2) >= '0' and chr(d2) <= '9' or chr(d2).upper() >= 'A' and chr(d2).upper() <= 'F'): + word += chr(int(chr(d1) + chr(d2), 16)) + wordExact.append(int(chr(d1) + chr(d2), 16)) + hexcode = True + if oEntropy != None: + oEntropy.add(d1, insideStream) + oEntropy.add(d2, insideStream) + if oPDFEOF != None: + oPDFEOF.parse(d1) + oPDFEOF.parse(d2) + else: + oBinaryFile.unget(d2) + oBinaryFile.unget(d1) + (word, wordExact, hexcode, lastName, insideStream) = UpdateWords(word, wordExact, slash, words, hexcode, allNames, lastName, insideStream, oEntropy, fOut) + if disarm: + fOut.write(C2BIP3(char)) + else: + oBinaryFile.unget(d1) + (word, wordExact, hexcode, lastName, insideStream) = UpdateWords(word, wordExact, slash, words, hexcode, allNames, lastName, insideStream, oEntropy, fOut) + if disarm: + fOut.write(C2BIP3(char)) + else: + oCVE_2009_3459.Check(lastName, word) + + (word, wordExact, hexcode, lastName, insideStream) = UpdateWords(word, wordExact, slash, words, hexcode, allNames, lastName, insideStream, oEntropy, fOut) + if char == '/': + slash = '/' + else: + slash = '' + if disarm: + fOut.write(C2BIP3(char)) + + if oPDFDate != None and oPDFDate.parse(char) != None: + dates.append([oPDFDate.date, lastName]) + + if oEntropy != None: + oEntropy.add(byte, insideStream) + + if oPDFEOF != None: + oPDFEOF.parse(char) + + byte = oBinaryFile.byte() + (word, wordExact, hexcode, lastName, insideStream) = UpdateWords(word, wordExact, slash, words, hexcode, allNames, lastName, insideStream, oEntropy, fOut) + + # check to see if file ended with %%EOF. If so, we can reset charsAfterLastEOF and add one to EOF count. This is never performed in + # the parse function because it never gets called due to hitting the end of file. + if byte == None and oPDFEOF != None: + if oPDFEOF.token == '%%EOF': + oPDFEOF.cntEOFs += 1 + oPDFEOF.cntCharsAfterLastEOF = 0 + oPDFEOF.token = '' + + except SystemExit: + sys.exit() + except: + attErrorOccured.nodeValue = 'True' + attErrorMessage.nodeValue = traceback.format_exc() + + if disarm: + fOut.close() + + attEntropyAll = xmlDoc.createAttribute('TotalEntropy') + xmlDoc.documentElement.setAttributeNode(attEntropyAll) + attCountAll = xmlDoc.createAttribute('TotalCount') + xmlDoc.documentElement.setAttributeNode(attCountAll) + attEntropyStream = xmlDoc.createAttribute('StreamEntropy') + xmlDoc.documentElement.setAttributeNode(attEntropyStream) + attCountStream = xmlDoc.createAttribute('StreamCount') + xmlDoc.documentElement.setAttributeNode(attCountStream) + attEntropyNonStream = xmlDoc.createAttribute('NonStreamEntropy') + xmlDoc.documentElement.setAttributeNode(attEntropyNonStream) + attCountNonStream = xmlDoc.createAttribute('NonStreamCount') + xmlDoc.documentElement.setAttributeNode(attCountNonStream) + if oEntropy != None: + (countAll, entropyAll , countStream, entropyStream, countNonStream, entropyNonStream) = oEntropy.calc() + attEntropyAll.nodeValue = '%f' % entropyAll + attCountAll.nodeValue = '%d' % countAll + attEntropyStream.nodeValue = '%f' % entropyStream + attCountStream.nodeValue = '%d' % countStream + attEntropyNonStream.nodeValue = '%f' % entropyNonStream + attCountNonStream.nodeValue = '%d' % countNonStream + else: + attEntropyAll.nodeValue = '' + attCountAll.nodeValue = '' + attEntropyStream.nodeValue = '' + attCountStream.nodeValue = '' + attEntropyNonStream.nodeValue = '' + attCountNonStream.nodeValue = '' + attCountEOF = xmlDoc.createAttribute('CountEOF') + xmlDoc.documentElement.setAttributeNode(attCountEOF) + attCountCharsAfterLastEOF = xmlDoc.createAttribute('CountCharsAfterLastEOF') + xmlDoc.documentElement.setAttributeNode(attCountCharsAfterLastEOF) + if oPDFEOF != None: + attCountEOF.nodeValue = '%d' % oPDFEOF.cntEOFs + attCountCharsAfterLastEOF.nodeValue = '%d' % oPDFEOF.cntCharsAfterLastEOF + else: + attCountEOF.nodeValue = '' + attCountCharsAfterLastEOF.nodeValue = '' + + eleKeywords = xmlDoc.createElement('Keywords') + xmlDoc.documentElement.appendChild(eleKeywords) + for keyword in keywords: + eleKeyword = xmlDoc.createElement('Keyword') + eleKeywords.appendChild(eleKeyword) + att = xmlDoc.createAttribute('Name') + att.nodeValue = keyword + eleKeyword.setAttributeNode(att) + att = xmlDoc.createAttribute('Count') + att.nodeValue = str(words[keyword][0]) + eleKeyword.setAttributeNode(att) + att = xmlDoc.createAttribute('HexcodeCount') + att.nodeValue = str(words[keyword][1]) + eleKeyword.setAttributeNode(att) + eleKeyword = xmlDoc.createElement('Keyword') + eleKeywords.appendChild(eleKeyword) + att = xmlDoc.createAttribute('Name') + att.nodeValue = '/Colors > 2^24' + eleKeyword.setAttributeNode(att) + att = xmlDoc.createAttribute('Count') + att.nodeValue = str(oCVE_2009_3459.count) + eleKeyword.setAttributeNode(att) + att = xmlDoc.createAttribute('HexcodeCount') + att.nodeValue = str(0) + eleKeyword.setAttributeNode(att) + if allNames: + keys = sorted(words.keys()) + for word in keys: + if not word in keywords: + eleKeyword = xmlDoc.createElement('Keyword') + eleKeywords.appendChild(eleKeyword) + att = xmlDoc.createAttribute('Name') + att.nodeValue = word + eleKeyword.setAttributeNode(att) + att = xmlDoc.createAttribute('Count') + att.nodeValue = str(words[word][0]) + eleKeyword.setAttributeNode(att) + att = xmlDoc.createAttribute('HexcodeCount') + att.nodeValue = str(words[word][1]) + eleKeyword.setAttributeNode(att) + eleDates = xmlDoc.createElement('Dates') + xmlDoc.documentElement.appendChild(eleDates) + dates.sort(key=lambda x: x[0]) + for date in dates: + eleDate = xmlDoc.createElement('Date') + eleDates.appendChild(eleDate) + att = xmlDoc.createAttribute('Value') + att.nodeValue = date[0] + eleDate.setAttributeNode(att) + att = xmlDoc.createAttribute('Name') + att.nodeValue = date[1] + eleDate.setAttributeNode(att) + return xmlDoc + +def PDFiD2String(xmlDoc, force): + result = 'PDFiD %s %s\n' % (xmlDoc.documentElement.getAttribute('Version'), xmlDoc.documentElement.getAttribute('Filename')) + if xmlDoc.documentElement.getAttribute('ErrorOccured') == 'True': + return result + '***Error occured***\n%s\n' % xmlDoc.documentElement.getAttribute('ErrorMessage') + if not force and xmlDoc.documentElement.getAttribute('IsPDF') == 'False': + return result + ' Not a PDF document\n' + result += ' PDF Header: %s\n' % xmlDoc.documentElement.getAttribute('Header') + for node in xmlDoc.documentElement.getElementsByTagName('Keywords')[0].childNodes: + result += ' %-16s %7d' % (node.getAttribute('Name'), int(node.getAttribute('Count'))) + if int(node.getAttribute('HexcodeCount')) > 0: + result += '(%d)' % int(node.getAttribute('HexcodeCount')) + result += '\n' + if xmlDoc.documentElement.getAttribute('CountEOF') != '': + result += ' %-16s %7d\n' % ('%%EOF', int(xmlDoc.documentElement.getAttribute('CountEOF'))) + if xmlDoc.documentElement.getAttribute('CountCharsAfterLastEOF') != '': + result += ' %-16s %7d\n' % ('After last %%EOF', int(xmlDoc.documentElement.getAttribute('CountCharsAfterLastEOF'))) + for node in xmlDoc.documentElement.getElementsByTagName('Dates')[0].childNodes: + result += ' %-23s %s\n' % (node.getAttribute('Value'), node.getAttribute('Name')) + if xmlDoc.documentElement.getAttribute('TotalEntropy') != '': + result += ' Total entropy: %s (%10s bytes)\n' % (xmlDoc.documentElement.getAttribute('TotalEntropy'), xmlDoc.documentElement.getAttribute('TotalCount')) + if xmlDoc.documentElement.getAttribute('StreamEntropy') != '': + result += ' Entropy inside streams: %s (%10s bytes)\n' % (xmlDoc.documentElement.getAttribute('StreamEntropy'), xmlDoc.documentElement.getAttribute('StreamCount')) + if xmlDoc.documentElement.getAttribute('NonStreamEntropy') != '': + result += ' Entropy outside streams: %s (%10s bytes)\n' % (xmlDoc.documentElement.getAttribute('NonStreamEntropy'), xmlDoc.documentElement.getAttribute('NonStreamCount')) + return result + +class cCount(): + def __init__(self, count, hexcode): + self.count = count + self.hexcode = hexcode + +class cPDFiD(): + def __init__(self, xmlDoc, force): + self.version = xmlDoc.documentElement.getAttribute('Version') + self.filename = xmlDoc.documentElement.getAttribute('Filename') + self.errorOccured = xmlDoc.documentElement.getAttribute('ErrorOccured') == 'True' + self.errorMessage = xmlDoc.documentElement.getAttribute('ErrorMessage') + self.isPDF = None + if self.errorOccured: + return + self.isPDF = xmlDoc.documentElement.getAttribute('IsPDF') == 'True' + if not force and not self.isPDF: + return + self.header = xmlDoc.documentElement.getAttribute('Header') + self.keywords = {} + for node in xmlDoc.documentElement.getElementsByTagName('Keywords')[0].childNodes: + self.keywords[node.getAttribute('Name')] = cCount(int(node.getAttribute('Count')), int(node.getAttribute('HexcodeCount'))) + self.obj = self.keywords['obj'] + self.endobj = self.keywords['endobj'] + self.stream = self.keywords['stream'] + self.endstream = self.keywords['endstream'] + self.xref = self.keywords['xref'] + self.trailer = self.keywords['trailer'] + self.startxref = self.keywords['startxref'] + self.page = self.keywords['/Page'] + self.encrypt = self.keywords['/Encrypt'] + self.objstm = self.keywords['/ObjStm'] + self.js = self.keywords['/JS'] + self.javascript = self.keywords['/JavaScript'] + self.aa = self.keywords['/AA'] + self.openaction = self.keywords['/OpenAction'] + self.acroform = self.keywords['/AcroForm'] + self.jbig2decode = self.keywords['/JBIG2Decode'] + self.richmedia = self.keywords['/RichMedia'] + self.launch = self.keywords['/Launch'] + self.embeddedfile = self.keywords['/EmbeddedFile'] + self.xfa = self.keywords['/XFA'] + self.colors_gt_2_24 = self.keywords['/Colors > 2^24'] + +def Print(lines, options): + print(lines) + filename = None + if options.scan: + filename = 'PDFiD.log' + if options.output != '': + filename = options.output + if filename: + logfile = open(filename, 'a') + logfile.write(lines + '\n') + logfile.close() + +def Quote(value, separator, quote): + if isinstance(value, str): + if separator in value: + return quote + value + quote + return value + +def MakeCSVLine(fields, separator=';', quote='"'): + formatstring = separator.join([field[0] for field in fields]) + strings = [Quote(field[1], separator, quote) for field in fields] + return formatstring % tuple(strings) + +def ProcessFile(filename, options, plugins): + xmlDoc = PDFiD(filename, options.all, options.extra, options.disarm, options.force) + if plugins == [] and options.select == '': + Print(PDFiD2String(xmlDoc, options.force), options) + return + + oPDFiD = cPDFiD(xmlDoc, options.force) + if options.select: + if options.force or not oPDFiD.errorOccured and oPDFiD.isPDF: + pdf = oPDFiD + try: + selected = eval(options.select) + except Exception as e: + Print('Error evaluating select expression: %s' % options.select, options) + if options.verbose: + raise e + return + if selected: + if options.csv: + Print(filename, options) + else: + Print(PDFiD2String(xmlDoc, options.force), options) + else: + for cPlugin in plugins: + if not cPlugin.onlyValidPDF or not oPDFiD.errorOccured and oPDFiD.isPDF: + try: + oPlugin = cPlugin(oPDFiD) + except Exception as e: + Print('Error instantiating plugin: %s' % cPlugin.name, options) + if options.verbose: + raise e + return + + try: + score = oPlugin.Score() + except Exception as e: + Print('Error running plugin: %s' % cPlugin.name, options) + if options.verbose: + raise e + return + + if options.csv: + if score >= options.minimumscore: + Print(MakeCSVLine((('%s', filename), ('%s', cPlugin.name), ('%.02f', score))), options) + else: + if score >= options.minimumscore: + Print(PDFiD2String(xmlDoc, options.force), options) + Print('%s score: %.02f' % (cPlugin.name, score), options) + else: + if options.csv: + if oPDFiD.errorOccured: + Print(MakeCSVLine((('%s', filename), ('%s', cPlugin.name), ('%s', 'Error occured'))), options) + if not oPDFiD.isPDF: + Print(MakeCSVLine((('%s', filename), ('%s', cPlugin.name), ('%s', 'Not a PDF document'))), options) + else: + Print(PDFiD2String(xmlDoc, options.force), options) + + +def Scan(directory, options, plugins): + try: + if os.path.isdir(directory): + for entry in os.listdir(directory): + Scan(os.path.join(directory, entry), options, plugins) + else: + ProcessFile(directory, options, plugins) + except Exception as e: +# print directory + print(e) +# print(sys.exc_info()[2]) +# print traceback.format_exc() + +#function derived from: http://blog.9bplus.com/pdfidpy-output-to-json +def PDFiD2JSON(xmlDoc, force): + #Get Top Layer Data + errorOccured = xmlDoc.documentElement.getAttribute('ErrorOccured') + errorMessage = xmlDoc.documentElement.getAttribute('ErrorMessage') + filename = xmlDoc.documentElement.getAttribute('Filename') + header = xmlDoc.documentElement.getAttribute('Header') + isPdf = xmlDoc.documentElement.getAttribute('IsPDF') + version = xmlDoc.documentElement.getAttribute('Version') + entropy = xmlDoc.documentElement.getAttribute('Entropy') + + #extra data + countEof = xmlDoc.documentElement.getAttribute('CountEOF') + countChatAfterLastEof = xmlDoc.documentElement.getAttribute('CountCharsAfterLastEOF') + totalEntropy = xmlDoc.documentElement.getAttribute('TotalEntropy') + streamEntropy = xmlDoc.documentElement.getAttribute('StreamEntropy') + nonStreamEntropy = xmlDoc.documentElement.getAttribute('NonStreamEntropy') + + keywords = [] + dates = [] + + #grab all keywords + for node in xmlDoc.documentElement.getElementsByTagName('Keywords')[0].childNodes: + name = node.getAttribute('Name') + count = int(node.getAttribute('Count')) + if int(node.getAttribute('HexcodeCount')) > 0: + hexCount = int(node.getAttribute('HexcodeCount')) + else: + hexCount = 0 + keyword = { 'count':count, 'hexcodecount':hexCount, 'name':name } + keywords.append(keyword) + + #grab all date information + for node in xmlDoc.documentElement.getElementsByTagName('Dates')[0].childNodes: + name = node.getAttribute('Name') + value = node.getAttribute('Value') + date = { 'name':name, 'value':value } + dates.append(date) + + data = { 'countEof':countEof, 'countChatAfterLastEof':countChatAfterLastEof, 'totalEntropy':totalEntropy, 'streamEntropy':streamEntropy, 'nonStreamEntropy':nonStreamEntropy, 'errorOccured':errorOccured, 'errorMessage':errorMessage, 'filename':filename, 'header':header, 'isPdf':isPdf, 'version':version, 'entropy':entropy, 'keywords': { 'keyword': keywords }, 'dates': { 'date':dates} } + complete = [ { 'pdfid' : data} ] + result = json.dumps(complete) + return result + +def File2Strings(filename): + try: + f = open(filename, 'r') + except: + return None + try: + return list(map(lambda line:line.rstrip('\n'), f.readlines())) + except: + return None + finally: + f.close() + +def ProcessAt(argument): + if argument.startswith('@'): + strings = File2Strings(argument[1:]) + if strings == None: + raise Exception('Error reading %s' % argument) + else: + return strings + else: + return [argument] + +def AddPlugin(cClass): + global plugins + + plugins.append(cClass) + +def ExpandFilenameArguments(filenames): + return list(collections.OrderedDict.fromkeys(sum(map(glob.glob, sum(map(ProcessAt, filenames), [])), []))) + +class cPluginParent(): + onlyValidPDF = True + +def LoadPlugins(plugins, verbose): + if plugins == '': + return + scriptPath = os.path.dirname(sys.argv[0]) + for plugin in sum(map(ProcessAt, plugins.split(',')), []): + try: + if not plugin.lower().endswith('.py'): + plugin += '.py' + if os.path.dirname(plugin) == '': + if not os.path.exists(plugin): + scriptPlugin = os.path.join(scriptPath, plugin) + if os.path.exists(scriptPlugin): + plugin = scriptPlugin + exec(open(plugin, 'r').read()) + except Exception as e: + print('Error loading plugin: %s' % plugin) + if verbose: + raise e + +def PDFiDMain(filenames, options): + global plugins + plugins = [] + LoadPlugins(options.plugins, options.verbose) + + if options.csv: + if plugins != []: + Print(MakeCSVLine((('%s', 'Filename'), ('%s', 'Plugin-name'), ('%s', 'Score'))), options) + elif options.select != '': + Print('Filename', options) + + for filename in filenames: + if options.scan: + Scan(filename, options, plugins) + else: + ProcessFile(filename, options, plugins) + +def Main(): + moredesc = ''' + +Arguments: +pdf-file and zip-file can be a single file, several files, and/or @file +@file: run PDFiD on each file listed in the text file specified +wildcards are supported + +Source code put in the public domain by Didier Stevens, no Copyright +Use at your own risk +https://DidierStevens.com''' + + oParser = optparse.OptionParser(usage='usage: %prog [options] [pdf-file|zip-file|url|@file] ...\n' + __description__ + moredesc, version='%prog ' + __version__) + oParser.add_option('-s', '--scan', action='store_true', default=False, help='scan the given directory') + oParser.add_option('-a', '--all', action='store_true', default=False, help='display all the names') + oParser.add_option('-e', '--extra', action='store_true', default=False, help='display extra data, like dates') + oParser.add_option('-f', '--force', action='store_true', default=False, help='force the scan of the file, even without proper %PDF header') + oParser.add_option('-d', '--disarm', action='store_true', default=False, help='disable JavaScript and auto launch') + oParser.add_option('-p', '--plugins', type=str, default='', help='plugins to load (separate plugins with a comma , ; @file supported)') + oParser.add_option('-c', '--csv', action='store_true', default=False, help='output csv data when using plugins') + oParser.add_option('-m', '--minimumscore', type=float, default=0.0, help='minimum score for plugin results output') + oParser.add_option('-v', '--verbose', action='store_true', default=False, help='verbose (will also raise catched exceptions)') + oParser.add_option('-S', '--select', type=str, default='', help='selection expression') + oParser.add_option('-o', '--output', type=str, default='', help='output to log file') + (options, args) = oParser.parse_args() + + if len(args) == 0: + if options.disarm: + print('Option disarm not supported with stdin') + options.disarm = False + if options.scan: + print('Option scan not supported with stdin') + options.scan = False + filenames = [''] + else: + try: + filenames = ExpandFilenameArguments(args) + except Exception as e: + print(e) + return + PDFiDMain(filenames, options) + +if __name__ == '__main__': + Main() diff --git a/fs_filecheck/usr/local/bin/plugin_embeddedfile.py b/fs_filecheck/usr/local/bin/plugin_embeddedfile.py new file mode 100644 index 0000000..2d68bcb --- /dev/null +++ b/fs_filecheck/usr/local/bin/plugin_embeddedfile.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python + +#2014/10/13 + +class cPDFiDEmbeddedFile(cPluginParent): +# onlyValidPDF = True + name = 'EmbeddedFile plugin' + + def __init__(self, oPDFiD): + self.oPDFiD = oPDFiD + + def Score(self): + if '/EmbeddedFile' in self.oPDFiD.keywords and self.oPDFiD.keywords['/EmbeddedFile'].count > 0: + if self.oPDFiD.keywords['/EmbeddedFile'].hexcode > 0: + return 1.0 + else: + return 0.9 + else: + return 0.0 + +AddPlugin(cPDFiDEmbeddedFile) diff --git a/fs_filecheck/usr/local/bin/plugin_list b/fs_filecheck/usr/local/bin/plugin_list new file mode 100644 index 0000000..489ac9a --- /dev/null +++ b/fs_filecheck/usr/local/bin/plugin_list @@ -0,0 +1,3 @@ +plugin_embeddedfile.py +plugin_nameobfuscation.py +plugin_triage.py diff --git a/fs_filecheck/usr/local/bin/plugin_nameobfuscation.py b/fs_filecheck/usr/local/bin/plugin_nameobfuscation.py new file mode 100644 index 0000000..e116da2 --- /dev/null +++ b/fs_filecheck/usr/local/bin/plugin_nameobfuscation.py @@ -0,0 +1,19 @@ +#!/usr/bin/env python + +#2013/11/04 +#2013/11/08 + +class cPDFiDNameObfuscation(cPluginParent): +# onlyValidPDF = True + name = 'Name Obfuscation plugin' + + def __init__(self, oPDFiD): + self.oPDFiD = oPDFiD + + def Score(self): + if sum([oCount.hexcode for oCount in self.oPDFiD.keywords.values()]) > 0: + return 1.0 + else: + return 0.0 + +AddPlugin(cPDFiDNameObfuscation) diff --git a/fs_filecheck/usr/local/bin/plugin_triage.py b/fs_filecheck/usr/local/bin/plugin_triage.py new file mode 100644 index 0000000..8245c6f --- /dev/null +++ b/fs_filecheck/usr/local/bin/plugin_triage.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python + +#2014/09/30 + +class cPDFiDTriage(cPluginParent): +# onlyValidPDF = True + name = 'Triage plugin' + + def __init__(self, oPDFiD): + self.oPDFiD = oPDFiD + + def Score(self): + for keyword in ('/ObjStm', '/JS', '/JavaScript', '/AA', '/OpenAction', '/AcroForm', '/JBIG2Decode', '/RichMedia', '/Launch', '/EmbeddedFile', '/XFA', '/Colors > 2^24'): + if keyword in self.oPDFiD.keywords and self.oPDFiD.keywords[keyword].count > 0: + return 1.0 + if self.oPDFiD.keywords['obj'].count != self.oPDFiD.keywords['endobj'].count: + return 1.0 + if self.oPDFiD.keywords['stream'].count != self.oPDFiD.keywords['endstream'].count: + return 1.0 + return 0.0 + +AddPlugin(cPDFiDTriage) diff --git a/mount_image.sh b/mount_image.sh index 5d76f23..a5f4978 100755 --- a/mount_image.sh +++ b/mount_image.sh @@ -28,14 +28,14 @@ set -e set -x # If you use a partition... -#PARTITION_ROOTFS='/dev/mmcblk0p2' -#PARTITION_BOOT='/dev/mmcblk0p1' -PARTITION_ROOTFS='/dev/sdd2' -PARTITION_BOOT='/dev/sdd1' +PARTITION_ROOTFS='/dev/mmcblk0p2' +PARTITION_BOOT='/dev/mmcblk0p1' +#PARTITION_ROOTFS='/dev/sdd2' +#PARTITION_BOOT='/dev/sdd1' # If you use the img ##### Debian -IMAGE='raspbian-wheezy.img' +IMAGE='2015-11-06-CIRCLean.img' OFFSET_ROOTFS=$((122880 * 512)) OFFSET_BOOT=$((8192 * 512)) ##### Arch diff --git a/tests/run.sh b/tests/run.sh index b828e2d..5c1ba43 100755 --- a/tests/run.sh +++ b/tests/run.sh @@ -2,22 +2,6 @@ # http://pub.phyks.me/respawn/mypersonaldata/public/2014-05-20-11-08-01/ -# To make debugging easier -echo "KittenGroomer: in tests/run.sh" 1>&2 - -if [ -z "$1" ]; then - echo "Please tell me which partition type to test." - echo "VFAT_NORM VFAT_PART NTPS_NORM EXT2 EXT3 EXT4" - exit -fi -if [ -z "$2" ]; then - echo "Please tell me which file type to test." - echo "t_images1" - exit -fi -TEST_PART_TYPE=${1} -TEST_SOURCE_TYPE=${2} - IMAGE='../raspbian-wheezy.img' OFFSET_ROOTFS=$((122880 * 512))