Merge remote-tracking branch 'refs/remotes/CIRCL/master'

pull/37/head
Eleanor Saitta 2015-12-08 16:18:59 +00:00
commit 3d43c32808
28 changed files with 1535 additions and 21 deletions

86
README_filecheck.md Normal file
View File

@ -0,0 +1,86 @@
Install Qemu and Expect
============
Install the necessary packages:
```
sudo apt-get install qemu qemu-user-static expect
```
Create a new image from scratch
===============================
* Download the most recent Raspbian version:
http://downloads.raspberrypi.org/raspbian_latest
* Unpack it:
```
unzip 2015-05-05-raspbian-wheezy.zip
mv 2015-05-05-raspbian-wheezy.zip raspbian-wheezy.zip
```
Prepare the image
=================
It will be used for the build environment and the final image.
* [Add empty space to the image](resize_img.md)
* Chroot in the image
```
sudo ./proper_chroot.sh
```
* Change your user to root (your global variables may be broken)
```
su root
```
* The locales may be broken, fix it (remove `en_GB.UTF-8 UTF-8`, set `en_US.UTF-8 UTF-8`):
```
dpkg-reconfigure locales
```
* In the image, make sure everything is up-to-date, and remove the old packages
```
apt-get update
apt-get dist-upgrade
apt-get autoremove
apt-get install p7zip-full python-dev libxml2-dev libxslt1-dev pmount
```
* Install python requirements
```
pip install lxml
pip install oletools olefile
pip install officedissector
pip install git+https://github.com/Rafiot/python-magic.git@travis
pip install git+https://github.com/CIRCL/PyCIRCLean.git
```
* Create the user and mtab for a RO filesystem
```
useradd -m kitten
chown -R kitten:kitten /home/kitten
ln -s /proc/mounts /etc/mtab
```
* Copy the files
```
sudo ./copy_to_final.sh /mnt/arm_rPi/
```
* Enable rc.local
```
systemctl enable rc-local.service
```

View File

@ -16,7 +16,8 @@ Create a new image from scratch
* Unpack it:
```
unzip 2015-02-16-raspbian-wheezy.zip
unzip 2015-05-05-raspbian-wheezy.zip
mv 2015-05-05-raspbian-wheezy.zip raspbian-wheezy.zip
```
Prepare the base image
@ -26,12 +27,6 @@ It will be used for the build environment and the final image.
* [Add empty space to the image](resize_img.md)
* Edit `mount_image.sh` and change the `IMAGE` variable accordingly
```
IMAGE='2015-02-16-raspbian-wheezy.img'
```
* Chroot in the image
```
@ -64,8 +59,8 @@ Setup two images
* Create two separate images: one will be used to build the deb packages that are not available in wheezy
```
mv 2015-02-16-raspbian-wheezy.img BUILDENV_2015-02-16-raspbian-wheezy.img
cp BUILDENV_2015-02-16-raspbian-wheezy.img FINAL_2015-02-16-raspbian-wheezy.img
mv raspbian-wheezy.img BUILDENV-raspbian-wheezy.img
cp BUILDENV-raspbian-wheezy.img FINAL-raspbian-wheezy.img
```
Build environment specifics
@ -74,7 +69,7 @@ Build environment specifics
* Create a symlink to the build image
```
ln -s BUILDENV_2015-02-16-raspbian-wheezy.img 2015-02-16-raspbian-wheezy.img
ln -s BUILDENV-raspbian-wheezy.img raspbian-wheezy.img
```
* Chroot in the image
@ -147,8 +142,8 @@ Final image specifics
* Change the link to the image
```
rm 2015-02-16-raspbian-wheezy.img
ln -s FINAL_2015-02-16-raspbian-wheezy.img 2015-02-16-raspbian-wheezy.img
rm raspbian-wheezy.img
ln -s FINAL-raspbian-wheezy.img -raspbian-wheezy.img
```
* Chroot in the image
@ -210,7 +205,7 @@ Write the image on a SD card
*WARNING*: Make sure you write on the right filesystem
```
sudo dd bs=4M if=FINAL_2015-02-16-raspbian-wheezy.img of=/dev/<FILESYSTEM>
sudo dd bs=4M if=FINAL-raspbian-wheezy.img of=/dev/<FILESYSTEM>
```
Run the tests

View File

@ -19,7 +19,7 @@ fi
#cp deb/*.deb ${CHROOT_PATH}/
# prepare fs archive
tar -cvpzf backup.tar.gz -C fs/ .
tar -cvpzf backup.tar.gz -C fs_filecheck/ .
tar -xzf backup.tar.gz -C ${CHROOT_PATH}/
chown root:root ${CHROOT_PATH}/etc/sudoers
if [ -f deb/led ]; then

View File

@ -0,0 +1 @@
SUBSYSTEM=="hidraw", DRIVERS=="usbhid", RUN+="/bin/bash -c 'cd /sys/devices/platform/bcm2708_usb/usb1/1-1 ; for d in $(ls -d 1-1.[2-5]); do if [ $(ls -lR $d | grep -c usbhid) -gt 0 ] ; then echo 0 > $d/authorized ; fi ; done'"

6
fs_filecheck/etc/fstab Normal file
View File

@ -0,0 +1,6 @@
proc /proc proc defaults 0 0
/dev/mmcblk0p1 /boot vfat ro,defaults 0 0
/dev/mmcblk0p2 / ext4 ro,defaults,noatime 0 0
tmpfs /tmp tmpfs rw,size=64M,noexec,nodev,nosuid,mode=1777 0 0
tmpfs /media tmpfs rw,size=64M,noexec,nodev,nosuid,mode=1777 0 0
# a swapfile is not a swap partition, so no using swapon|off from here on, use dphys-swapfile swap[on|off] for that

54
fs_filecheck/etc/group Normal file
View File

@ -0,0 +1,54 @@
root:x:0:
daemon:x:1:
bin:x:2:
sys:x:3:
adm:x:4:pi
tty:x:5:
disk:x:6:
lp:x:7:
mail:x:8:
news:x:9:
uucp:x:10:
man:x:12:
proxy:x:13:
kmem:x:15:
dialout:x:20:pi
fax:x:21:
voice:x:22:
cdrom:x:24:pi
floppy:x:25:
tape:x:26:
sudo:x:27:pi
audio:x:29:pi
dip:x:30:
www-data:x:33:
backup:x:34:
operator:x:37:
list:x:38:
irc:x:39:
src:x:40:
gnats:x:41:
shadow:x:42:
utmp:x:43:
video:x:44:pi
sasl:x:45:
plugdev:x:46:pi,kitten
staff:x:50:
games:x:60:pi
users:x:100:pi
nogroup:x:65534:
libuuid:x:101:
crontab:x:102:
pi:x:1000:
ssh:x:103:
ntp:x:104:
netdev:x:105:pi
input:x:999:pi
messagebus:x:106:
lpadmin:x:107:
fuse:x:108:
lightdm:x:109:
indiecity:x:1001:root
spi:x:1002:pi
gpio:x:1003:pi
kitten:x:1004:

61
fs_filecheck/etc/pam.d/su Normal file
View File

@ -0,0 +1,61 @@
#
# The PAM configuration file for the Shadow `su' service
#
# This allows root to su without passwords (normal operation)
auth sufficient pam_rootok.so
# Uncomment this to force users to be a member of group root
# before they can use `su'. You can also add "group=foo"
# to the end of this line if you want to use a group other
# than the default "root" (but this may have side effect of
# denying "root" user, unless she's a member of "foo" or explicitly
# permitted earlier by e.g. "sufficient pam_rootok.so").
# (Replaces the `SU_WHEEL_ONLY' option from login.defs)
# auth required pam_wheel.so
# Uncomment this if you want wheel members to be able to
# su without a password.
# auth sufficient pam_wheel.so trust
# Uncomment this if you want members of a specific group to not
# be allowed to use su at all.
# auth required pam_wheel.so deny group=nosu
# Uncomment and edit /etc/security/time.conf if you need to set
# time restrainst on su usage.
# (Replaces the `PORTTIME_CHECKS_ENAB' option from login.defs
# as well as /etc/porttime)
# account requisite pam_time.so
# This module parses environment configuration file(s)
# and also allows you to use an extended config
# file /etc/security/pam_env.conf.
#
# parsing /etc/environment needs "readenv=1"
session required pam_env.so readenv=1
# locale variables are also kept into /etc/default/locale in etch
# reading this file *in addition to /etc/environment* does not hurt
session required pam_env.so readenv=1 envfile=/etc/default/locale
# Defines the MAIL environment variable
# However, userdel also needs MAIL_DIR and MAIL_FILE variables
# in /etc/login.defs to make sure that removing a user
# also removes the user's mail spool file.
# See comments in /etc/login.defs
#
# "nopen" stands to avoid reporting new mail when su'ing to another user
session optional pam_mail.so nopen
# Sets up user limits according to /etc/security/limits.conf
# (Replaces the use of /etc/limits in old login)
#session required pam_limits.so
# The standard Unix authentication modules, used with
# NIS (man nsswitch) as well as normal /etc/passwd and
# /etc/shadow entries.
@include common-auth
@include common-account
@include common-session

30
fs_filecheck/etc/passwd Normal file
View File

@ -0,0 +1,30 @@
root:x:0:0:root:/root:/bin/bash
daemon:x:1:1:daemon:/usr/sbin:/usr/sbin/nologin
bin:x:2:2:bin:/bin:/usr/sbin/nologin
sys:x:3:3:sys:/dev:/usr/sbin/nologin
sync:x:4:65534:sync:/bin:/bin/sync
games:x:5:60:games:/usr/games:/usr/sbin/nologin
man:x:6:12:man:/var/cache/man:/usr/sbin/nologin
lp:x:7:7:lp:/var/spool/lpd:/usr/sbin/nologin
mail:x:8:8:mail:/var/mail:/usr/sbin/nologin
news:x:9:9:news:/var/spool/news:/usr/sbin/nologin
uucp:x:10:10:uucp:/var/spool/uucp:/usr/sbin/nologin
proxy:x:13:13:proxy:/bin:/usr/sbin/nologin
www-data:x:33:33:www-data:/var/www:/usr/sbin/nologin
backup:x:34:34:backup:/var/backups:/usr/sbin/nologin
list:x:38:38:Mailing List Manager:/var/list:/usr/sbin/nologin
irc:x:39:39:ircd:/var/run/ircd:/usr/sbin/nologin
gnats:x:41:41:Gnats Bug-Reporting System (admin):/var/lib/gnats:/usr/sbin/nologin
nobody:x:65534:65534:nobody:/nonexistent:/usr/sbin/nologin
systemd-timesync:x:100:103:systemd Time Synchronization,,,:/run/systemd:/bin/false
systemd-network:x:101:104:systemd Network Management,,,:/run/systemd/netif:/bin/false
systemd-resolve:x:102:105:systemd Resolver,,,:/run/systemd/resolve:/bin/false
systemd-bus-proxy:x:103:106:systemd Bus Proxy,,,:/run/systemd:/bin/false
pi:x:1000:1000:,,,:/home/pi:/bin/false
sshd:x:104:65534::/var/run/sshd:/usr/sbin/nologin
messagebus:x:105:110::/var/run/dbus:/bin/false
avahi:x:106:111:Avahi mDNS daemon,,,:/var/run/avahi-daemon:/bin/false
ntp:x:107:112::/home/ntp:/bin/false
statd:x:108:65534::/var/lib/nfs:/bin/false
lightdm:x:109:114:Light Display Manager:/var/lib/lightdm:/bin/false
kitten:x:1001:1004::/home/kitten:/bin/bash

View File

@ -0,0 +1,5 @@
# /etc/pmount.allow
# pmount will allow users to additionally mount all devices that are
# listed here.
/dev/sdb1
/dev/sda*

View File

@ -0,0 +1,19 @@
#!/bin/sh
# Part of raspi-config http://github.com/asb/raspi-config
#
# See LICENSE file for copyright and license details
# Should be installed to /etc/profile.d/raspi-config.sh to force raspi-config
# to run at initial login
# You may also want to set automatic login in /etc/inittab on tty1 by adding a
# line such as:
# 1:2345:respawn:/bin/login -f root tty1 </dev/tty1 >/dev/tty1 2>&1 # RPICFG_TO_DISABLE
if [ $(id -u) -ne 0 ]; then
printf "\nNOTICE: the software on this Raspberry Pi has not been fully configured. Please run 'sudo raspi-config'\n\n"
else
# Disable raspi-config at the first run.
# raspi-config
exec login -f pi
fi

36
fs_filecheck/etc/rc.local Executable file
View File

@ -0,0 +1,36 @@
#!/bin/sh -e
#
# rc.local
#
# This script is executed at the end of each multiuser runlevel.
# Make sure that the script will "exit 0" on success or any other
# value on error.
#
# In order to enable or disable this script just change the execution
# bits.
#
# By default this script does nothing.
clean(){
echo 'Rc Local done, quit.'
/sbin/shutdown -P -h now
}
# Print the IP address
_IP=$(hostname -I) || true
if [ "$_IP" ]; then
printf "My IP address is %s\n" "$_IP"
fi
if [ -e /dev/sda ]; then
if [ -e /dev/sdb ]; then
# avoid possible misuse
/sbin/ifconfig eth0 down
trap clean EXIT TERM INT
cd /opt/groomer
/usr/sbin/led &
./init.sh
fi
fi
exit 0

View File

@ -0,0 +1 @@
kitten hard priority -20

28
fs_filecheck/etc/sudoers Normal file
View File

@ -0,0 +1,28 @@
#
# This file MUST be edited with the 'visudo' command as root.
#
# Please consider adding local content in /etc/sudoers.d/ instead of
# directly modifying this file.
#
# See the man page for details on how to write a sudoers file.
#
Defaults env_reset
Defaults mail_badpass
Defaults secure_path="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
# Host alias specification
# User alias specification
# Cmnd alias specification
# User privilege specification
#root ALL=(ALL:ALL) ALL
# Allow members of group sudo to execute any command
#%sudo ALL=(ALL:ALL) ALL
# See sudoers(5) for more information on "#include" directives:
#includedir /etc/sudoers.d
#pi ALL=(ALL) NOPASSWD: ALL

View File

@ -0,0 +1,12 @@
[Unit]
Description=/etc/rc.local Compatibility
[Service]
Type=oneshot
ExecStart=/etc/rc.local
TimeoutSec=0
StandardInput=tty
RemainAfterExit=yes
[Install]
WantedBy=multi-user.target

View File

@ -0,0 +1 @@
SUBSYSTEM=="hidraw", DRIVERS=="usbhid", RUN+="/bin/bash -c 'cd /sys/devices/platform/bcm2708_usb/usb1/1-1 ; for d in $(ls -d 1-1.[2-5]); do if [ $(ls -lR $d | grep -c usbhid) -gt 0 ] ; then echo 0 > $d/authorized ; fi ; done'"

View File

@ -0,0 +1,2 @@
KERNEL=="sdc", SYMLINK+="mmcblk0"
KERNEL=="sdc?", SYMLINK+="mmcblk0p%n",

View File

@ -0,0 +1,23 @@
DEV_SRC='/dev/sda'
DEV_DST='sdb1'
# User allowed to do the following commands without password
USERNAME='kitten'
MUSIC="/opt/midi/"
ID=`/usr/bin/id -u`
# Paths used in multiple scripts
SRC="src"
DST="dst"
TEMP="/media/${DST}/temp"
ZIPTEMP="/media/${DST}/ziptemp"
LOGS="/media/${DST}/logs"
# commands
SYNC='/bin/sync'
TIMIDITY='/usr/bin/timidity'
MOUNT='/bin/mount'
PMOUNT='/usr/bin/pmount -A -s'
PUMOUNT='/usr/bin/pumount'

View File

@ -0,0 +1,112 @@
#!/bin/bash
set -e
set -x
source ./constraint.sh
if ! [ "${ID}" -ge "1000" ]; then
echo "This script cannot run as root."
exit
fi
clean(){
echo Cleaning.
${SYNC}
# Cleanup source
pumount ${SRC}
# Cleanup destination
rm -rf ${TEMP}
rm -rf ${ZIPTEMP}
pumount ${DST}
exit
}
trap clean EXIT TERM INT
# De we have a source device
if [ ! -b ${DEV_SRC} ]; then
echo "Source device (${DEV_SRC}) does not exists."
exit
fi
# Find the partition names on the source device
DEV_PARTITIONS=`ls "${DEV_SRC}"* | grep "${DEV_SRC}[1-9][0-6]*" || true`
if [ -z "${DEV_PARTITIONS}" ]; then
echo "${DEV_SRC} does not have any partitions."
exit
fi
# Do we have a destination device
if [ ! -b "/dev/${DEV_DST}" ]; then
echo "Destination device (/dev/${DEV_DST}) does not exists."
exit
fi
# mount and prepare destination device
if ${MOUNT}|grep ${DST}; then
${PUMOUNT} ${DST} || true
fi
# uid= only works on a vfat FS. What should wedo if we get an ext* FS ?
${PMOUNT} -w ${DEV_DST} ${DST}
if [ ${?} -ne 0 ]; then
echo "Unable to mount /dev/${DEV_DST} on /media/${DST}"
exit
else
echo "Target USB device (/dev/${DEV_DST}) mounted at /media/${DST}"
rm -rf "/media/${DST}/FROM_PARTITION_"*
# prepare temp dirs and make sure it's empty
mkdir -p "${TEMP}"
mkdir -p "${ZIPTEMP}"
mkdir -p "${LOGS}"
rm -rf "${TEMP}/"*
rm -rf "${ZIPTEMP}/"*
rm -rf "${LOGS}/"*
fi
# Groom da kitteh!
# Find the FS types
# lsblk -n -o name,fstype,mountpoint,label,uuid -r
PARTCOUNT=1
for partition in ${DEV_PARTITIONS}
do
# Processing a partition
echo "Processing partition: ${partition}"
if [ `${MOUNT} | grep -c ${SRC}` -ne 0 ]; then
${PUMOUNT} ${SRC}
fi
${PMOUNT} -w ${partition} ${SRC}
ls "/media/${SRC}" | grep -i autorun.inf | xargs -I {} mv "/media/${SRC}"/{} "/media/${SRC}"/DANGEROUS_{}_DANGEROUS || true
${PUMOUNT} ${SRC}
${PMOUNT} -r ${partition} ${SRC}
if [ ${?} -ne 0 ]; then
echo "Unable to mount ${partition} on /media/${SRC}"
else
echo "${partition} mounted at /media/${SRC}"
# Print the filenames on the current partition in a logfile
find "/media/${SRC}" -fls "${LOGS}/Content_partition_${PARTCOUNT}.txt"
# create a directory on ${DST} named PARTION_$PARTCOUNT
target_dir="/media/${DST}/FROM_PARTITION_${PARTCOUNT}"
echo "copying to: ${target_dir}"
mkdir -p "${target_dir}"
LOGFILE="${LOGS}/processing.txt"
echo "==== Starting processing of /media/${SRC} to ${target_dir}. ====" >> ${LOGFILE}
filecheck.py --source /media/${SRC} --destination ${target_dir} || true
echo "==== Done with /media/${SRC} to ${target_dir}. ====" >> ${LOGFILE}
ls -lR "${target_dir}"
fi
let PARTCOUNT=`expr $PARTCOUNT + 1`
done
# The cleanup is automatically done in the function clean called when
# the program quits

View File

@ -0,0 +1,26 @@
#!/bin/bash
set -e
set -x
source ./constraint.sh
if [ ${ID} -ne 0 ]; then
echo "This script has to be run as root."
exit
fi
clean(){
echo Done, cleaning.
${SYNC}
kill -9 $(cat /tmp/music.pid)
rm -f /tmp/music.pid
}
trap clean EXIT TERM INT
./music.sh &
echo $! > /tmp/music.pid
su ${USERNAME} -c ./groomer.sh

View File

@ -0,0 +1,21 @@
#!/bin/bash
set -e
#set -x
source ./constraint.sh
killed(){
echo 'Music stopped.'
}
trap killed EXIT TERM INT
# Force output on analog
amixer cset numid=3 1
files=(${MUSIC}*)
while true; do
$TIMIDITY ${files[RANDOM % ${#files[@]}]}
done

View File

@ -0,0 +1,930 @@
#!/usr/bin/env python
__description__ = 'Tool to test a PDF file'
__author__ = 'Didier Stevens'
__version__ = '0.2.1'
__date__ = '2014/10/18'
"""
Tool to test a PDF file
Source code put in public domain by Didier Stevens, no Copyright
https://DidierStevens.com
Use at your own risk
History:
2009/03/27: start
2009/03/28: scan option
2009/03/29: V0.0.2: xml output
2009/03/31: V0.0.3: /ObjStm suggested by Dion
2009/04/02: V0.0.4: added ErrorMessage
2009/04/20: V0.0.5: added Dates
2009/04/21: V0.0.6: added entropy
2009/04/22: added disarm
2009/04/29: finished disarm
2009/05/13: V0.0.7: added cPDFEOF
2009/07/24: V0.0.8: added /AcroForm and /RichMedia, simplified %PDF header regex, extra date format (without TZ)
2009/07/25: added input redirection, option --force
2009/10/13: V0.0.9: added detection for CVE-2009-3459; added /RichMedia to disarm
2010/01/11: V0.0.10: relaxed %PDF header checking
2010/04/28: V0.0.11: added /Launch
2010/09/21: V0.0.12: fixed cntCharsAfterLastEOF bug; fix by Russell Holloway
2011/12/29: updated for Python 3, added keyword /EmbeddedFile
2012/03/03: added PDFiD2JSON; coded by Brandon Dixon
2013/02/10: V0.1.0: added http/https support; added support for ZIP file with password 'infected'
2013/03/11: V0.1.1: fixes for Python 3
2013/03/13: V0.1.2: Added error handling for files; added /XFA
2013/11/01: V0.2.0: Added @file & plugins
2013/11/02: continue
2013/11/04: added options -c, -m, -v
2013/11/06: added option -S
2013/11/08: continue
2013/11/09: added option -o
2013/11/15: refactoring
2014/09/30: added CSV header
2014/10/16: V0.2.1: added output when plugin & file not pdf
2014/10/18: some fixes for Python 3
Todo:
- update XML example (entropy, EOF)
- code review, cleanup
"""
import optparse
import os
import re
import xml.dom.minidom
import traceback
import math
import operator
import os.path
import sys
import json
import zipfile
import collections
import glob
try:
import urllib2
urllib23 = urllib2
except:
import urllib.request
urllib23 = urllib.request
#Convert 2 Bytes If Python 3
def C2BIP3(string):
if sys.version_info[0] > 2:
return bytes([ord(x) for x in string])
else:
return string
class cBinaryFile:
def __init__(self, file):
self.file = file
if file == '':
self.infile = sys.stdin
elif file.lower().startswith('http://') or file.lower().startswith('https://'):
try:
if sys.hexversion >= 0x020601F0:
self.infile = urllib23.urlopen(file, timeout=5)
else:
self.infile = urllib23.urlopen(file)
except urllib23.HTTPError:
print('Error accessing URL %s' % file)
print(sys.exc_info()[1])
sys.exit()
elif file.lower().endswith('.zip'):
try:
self.zipfile = zipfile.ZipFile(file, 'r')
self.infile = self.zipfile.open(self.zipfile.infolist()[0], 'r', C2BIP3('infected'))
except:
print('Error opening file %s' % file)
print(sys.exc_info()[1])
sys.exit()
else:
try:
self.infile = open(file, 'rb')
except:
print('Error opening file %s' % file)
print(sys.exc_info()[1])
sys.exit()
self.ungetted = []
def byte(self):
if len(self.ungetted) != 0:
return self.ungetted.pop()
inbyte = self.infile.read(1)
if not inbyte or inbyte == '':
self.infile.close()
return None
return ord(inbyte)
def bytes(self, size):
if size <= len(self.ungetted):
result = self.ungetted[0:size]
del self.ungetted[0:size]
return result
inbytes = self.infile.read(size - len(self.ungetted))
if inbytes == '':
self.infile.close()
if type(inbytes) == type(''):
result = self.ungetted + [ord(b) for b in inbytes]
else:
result = self.ungetted + [b for b in inbytes]
self.ungetted = []
return result
def unget(self, byte):
self.ungetted.append(byte)
def ungets(self, bytes):
bytes.reverse()
self.ungetted.extend(bytes)
class cPDFDate:
def __init__(self):
self.state = 0
def parse(self, char):
if char == 'D':
self.state = 1
return None
elif self.state == 1:
if char == ':':
self.state = 2
self.digits1 = ''
else:
self.state = 0
return None
elif self.state == 2:
if len(self.digits1) < 14:
if char >= '0' and char <= '9':
self.digits1 += char
return None
else:
self.state = 0
return None
elif char == '+' or char == '-' or char == 'Z':
self.state = 3
self.digits2 = ''
self.TZ = char
return None
elif char == '"':
self.state = 0
self.date = 'D:' + self.digits1
return self.date
elif char < '0' or char > '9':
self.state = 0
self.date = 'D:' + self.digits1
return self.date
else:
self.state = 0
return None
elif self.state == 3:
if len(self.digits2) < 2:
if char >= '0' and char <= '9':
self.digits2 += char
return None
else:
self.state = 0
return None
elif len(self.digits2) == 2:
if char == "'":
self.digits2 += char
return None
else:
self.state = 0
return None
elif len(self.digits2) < 5:
if char >= '0' and char <= '9':
self.digits2 += char
if len(self.digits2) == 5:
self.state = 0
self.date = 'D:' + self.digits1 + self.TZ + self.digits2
return self.date
else:
return None
else:
self.state = 0
return None
def fEntropy(countByte, countTotal):
x = float(countByte) / countTotal
if x > 0:
return - x * math.log(x, 2)
else:
return 0.0
class cEntropy:
def __init__(self):
self.allBucket = [0 for i in range(0, 256)]
self.streamBucket = [0 for i in range(0, 256)]
def add(self, byte, insideStream):
self.allBucket[byte] += 1
if insideStream:
self.streamBucket[byte] += 1
def removeInsideStream(self, byte):
if self.streamBucket[byte] > 0:
self.streamBucket[byte] -= 1
def calc(self):
self.nonStreamBucket = map(operator.sub, self.allBucket, self.streamBucket)
allCount = sum(self.allBucket)
streamCount = sum(self.streamBucket)
nonStreamCount = sum(self.nonStreamBucket)
return (allCount, sum(map(lambda x: fEntropy(x, allCount), self.allBucket)), streamCount, sum(map(lambda x: fEntropy(x, streamCount), self.streamBucket)), nonStreamCount, sum(map(lambda x: fEntropy(x, nonStreamCount), self.nonStreamBucket)))
class cPDFEOF:
def __init__(self):
self.token = ''
self.cntEOFs = 0
def parse(self, char):
if self.cntEOFs > 0:
self.cntCharsAfterLastEOF += 1
if self.token == '' and char == '%':
self.token += char
return
elif self.token == '%' and char == '%':
self.token += char
return
elif self.token == '%%' and char == 'E':
self.token += char
return
elif self.token == '%%E' and char == 'O':
self.token += char
return
elif self.token == '%%EO' and char == 'F':
self.token += char
return
elif self.token == '%%EOF' and (char == '\n' or char == '\r' or char == ' ' or char == '\t'):
self.cntEOFs += 1
self.cntCharsAfterLastEOF = 0
if char == '\n':
self.token = ''
else:
self.token += char
return
elif self.token == '%%EOF\r':
if char == '\n':
self.cntCharsAfterLastEOF = 0
self.token = ''
else:
self.token = ''
def FindPDFHeaderRelaxed(oBinaryFile):
bytes = oBinaryFile.bytes(1024)
index = ''.join([chr(byte) for byte in bytes]).find('%PDF')
if index == -1:
oBinaryFile.ungets(bytes)
return ([], None)
for endHeader in range(index + 4, index + 4 + 10):
if bytes[endHeader] == 10 or bytes[endHeader] == 13:
break
oBinaryFile.ungets(bytes[endHeader:])
return (bytes[0:endHeader], ''.join([chr(byte) for byte in bytes[index:endHeader]]))
def Hexcode2String(char):
if type(char) == int:
return '#%02x' % char
else:
return char
def SwapCase(char):
if type(char) == int:
return ord(chr(char).swapcase())
else:
return char.swapcase()
def HexcodeName2String(hexcodeName):
return ''.join(map(Hexcode2String, hexcodeName))
def SwapName(wordExact):
return map(SwapCase, wordExact)
def UpdateWords(word, wordExact, slash, words, hexcode, allNames, lastName, insideStream, oEntropy, fOut):
if word != '':
if slash + word in words:
words[slash + word][0] += 1
if hexcode:
words[slash + word][1] += 1
elif slash == '/' and allNames:
words[slash + word] = [1, 0]
if hexcode:
words[slash + word][1] += 1
if slash == '/':
lastName = slash + word
if slash == '':
if word == 'stream':
insideStream = True
if word == 'endstream':
if insideStream == True and oEntropy != None:
for char in 'endstream':
oEntropy.removeInsideStream(ord(char))
insideStream = False
if fOut != None:
if slash == '/' and '/' + word in ('/JS', '/JavaScript', '/AA', '/OpenAction', '/JBIG2Decode', '/RichMedia', '/Launch'):
wordExactSwapped = HexcodeName2String(SwapName(wordExact))
fOut.write(C2BIP3(wordExactSwapped))
print('/%s -> /%s' % (HexcodeName2String(wordExact), wordExactSwapped))
else:
fOut.write(C2BIP3(HexcodeName2String(wordExact)))
return ('', [], False, lastName, insideStream)
class cCVE_2009_3459:
def __init__(self):
self.count = 0
def Check(self, lastName, word):
if (lastName == '/Colors' and word.isdigit() and int(word) > 2^24): # decided to alert when the number of colors is expressed with more than 3 bytes
self.count += 1
def XMLAddAttribute(xmlDoc, name, value=None):
att = xmlDoc.createAttribute(name)
xmlDoc.documentElement.setAttributeNode(att)
if value != None:
att.nodeValue = value
def PDFiD(file, allNames=False, extraData=False, disarm=False, force=False):
"""Example of XML output:
<PDFiD ErrorOccured="False" ErrorMessage="" Filename="test.pdf" Header="%PDF-1.1" IsPDF="True" Version="0.0.4" Entropy="4.28">
<Keywords>
<Keyword Count="7" HexcodeCount="0" Name="obj"/>
<Keyword Count="7" HexcodeCount="0" Name="endobj"/>
<Keyword Count="1" HexcodeCount="0" Name="stream"/>
<Keyword Count="1" HexcodeCount="0" Name="endstream"/>
<Keyword Count="1" HexcodeCount="0" Name="xref"/>
<Keyword Count="1" HexcodeCount="0" Name="trailer"/>
<Keyword Count="1" HexcodeCount="0" Name="startxref"/>
<Keyword Count="1" HexcodeCount="0" Name="/Page"/>
<Keyword Count="0" HexcodeCount="0" Name="/Encrypt"/>
<Keyword Count="1" HexcodeCount="0" Name="/JS"/>
<Keyword Count="1" HexcodeCount="0" Name="/JavaScript"/>
<Keyword Count="0" HexcodeCount="0" Name="/AA"/>
<Keyword Count="1" HexcodeCount="0" Name="/OpenAction"/>
<Keyword Count="0" HexcodeCount="0" Name="/JBIG2Decode"/>
</Keywords>
<Dates>
<Date Value="D:20090128132916+01'00" Name="/ModDate"/>
</Dates>
</PDFiD>
"""
word = ''
wordExact = []
hexcode = False
lastName = ''
insideStream = False
keywords = ('obj',
'endobj',
'stream',
'endstream',
'xref',
'trailer',
'startxref',
'/Page',
'/Encrypt',
'/ObjStm',
'/JS',
'/JavaScript',
'/AA',
'/OpenAction',
'/AcroForm',
'/JBIG2Decode',
'/RichMedia',
'/Launch',
'/EmbeddedFile',
'/XFA',
)
words = {}
dates = []
for keyword in keywords:
words[keyword] = [0, 0]
slash = ''
xmlDoc = xml.dom.minidom.getDOMImplementation().createDocument(None, 'PDFiD', None)
XMLAddAttribute(xmlDoc, 'Version', __version__)
XMLAddAttribute(xmlDoc, 'Filename', file)
attErrorOccured = XMLAddAttribute(xmlDoc, 'ErrorOccured', 'False')
attErrorMessage = XMLAddAttribute(xmlDoc, 'ErrorMessage', '')
oPDFDate = None
oEntropy = None
oPDFEOF = None
oCVE_2009_3459 = cCVE_2009_3459()
try:
attIsPDF = xmlDoc.createAttribute('IsPDF')
xmlDoc.documentElement.setAttributeNode(attIsPDF)
oBinaryFile = cBinaryFile(file)
if extraData:
oPDFDate = cPDFDate()
oEntropy = cEntropy()
oPDFEOF = cPDFEOF()
(bytesHeader, pdfHeader) = FindPDFHeaderRelaxed(oBinaryFile)
if disarm:
(pathfile, extension) = os.path.splitext(file)
fOut = open(pathfile + '.disarmed' + extension, 'wb')
for byteHeader in bytesHeader:
fOut.write(C2BIP3(chr(byteHeader)))
else:
fOut = None
if oEntropy != None:
for byteHeader in bytesHeader:
oEntropy.add(byteHeader, insideStream)
if pdfHeader == None and not force:
attIsPDF.nodeValue = 'False'
return xmlDoc
else:
if pdfHeader == None:
attIsPDF.nodeValue = 'False'
pdfHeader = ''
else:
attIsPDF.nodeValue = 'True'
att = xmlDoc.createAttribute('Header')
att.nodeValue = repr(pdfHeader[0:10]).strip("'")
xmlDoc.documentElement.setAttributeNode(att)
byte = oBinaryFile.byte()
while byte != None:
char = chr(byte)
charUpper = char.upper()
if charUpper >= 'A' and charUpper <= 'Z' or charUpper >= '0' and charUpper <= '9':
word += char
wordExact.append(char)
elif slash == '/' and char == '#':
d1 = oBinaryFile.byte()
if d1 != None:
d2 = oBinaryFile.byte()
if d2 != None and (chr(d1) >= '0' and chr(d1) <= '9' or chr(d1).upper() >= 'A' and chr(d1).upper() <= 'F') and (chr(d2) >= '0' and chr(d2) <= '9' or chr(d2).upper() >= 'A' and chr(d2).upper() <= 'F'):
word += chr(int(chr(d1) + chr(d2), 16))
wordExact.append(int(chr(d1) + chr(d2), 16))
hexcode = True
if oEntropy != None:
oEntropy.add(d1, insideStream)
oEntropy.add(d2, insideStream)
if oPDFEOF != None:
oPDFEOF.parse(d1)
oPDFEOF.parse(d2)
else:
oBinaryFile.unget(d2)
oBinaryFile.unget(d1)
(word, wordExact, hexcode, lastName, insideStream) = UpdateWords(word, wordExact, slash, words, hexcode, allNames, lastName, insideStream, oEntropy, fOut)
if disarm:
fOut.write(C2BIP3(char))
else:
oBinaryFile.unget(d1)
(word, wordExact, hexcode, lastName, insideStream) = UpdateWords(word, wordExact, slash, words, hexcode, allNames, lastName, insideStream, oEntropy, fOut)
if disarm:
fOut.write(C2BIP3(char))
else:
oCVE_2009_3459.Check(lastName, word)
(word, wordExact, hexcode, lastName, insideStream) = UpdateWords(word, wordExact, slash, words, hexcode, allNames, lastName, insideStream, oEntropy, fOut)
if char == '/':
slash = '/'
else:
slash = ''
if disarm:
fOut.write(C2BIP3(char))
if oPDFDate != None and oPDFDate.parse(char) != None:
dates.append([oPDFDate.date, lastName])
if oEntropy != None:
oEntropy.add(byte, insideStream)
if oPDFEOF != None:
oPDFEOF.parse(char)
byte = oBinaryFile.byte()
(word, wordExact, hexcode, lastName, insideStream) = UpdateWords(word, wordExact, slash, words, hexcode, allNames, lastName, insideStream, oEntropy, fOut)
# check to see if file ended with %%EOF. If so, we can reset charsAfterLastEOF and add one to EOF count. This is never performed in
# the parse function because it never gets called due to hitting the end of file.
if byte == None and oPDFEOF != None:
if oPDFEOF.token == '%%EOF':
oPDFEOF.cntEOFs += 1
oPDFEOF.cntCharsAfterLastEOF = 0
oPDFEOF.token = ''
except SystemExit:
sys.exit()
except:
attErrorOccured.nodeValue = 'True'
attErrorMessage.nodeValue = traceback.format_exc()
if disarm:
fOut.close()
attEntropyAll = xmlDoc.createAttribute('TotalEntropy')
xmlDoc.documentElement.setAttributeNode(attEntropyAll)
attCountAll = xmlDoc.createAttribute('TotalCount')
xmlDoc.documentElement.setAttributeNode(attCountAll)
attEntropyStream = xmlDoc.createAttribute('StreamEntropy')
xmlDoc.documentElement.setAttributeNode(attEntropyStream)
attCountStream = xmlDoc.createAttribute('StreamCount')
xmlDoc.documentElement.setAttributeNode(attCountStream)
attEntropyNonStream = xmlDoc.createAttribute('NonStreamEntropy')
xmlDoc.documentElement.setAttributeNode(attEntropyNonStream)
attCountNonStream = xmlDoc.createAttribute('NonStreamCount')
xmlDoc.documentElement.setAttributeNode(attCountNonStream)
if oEntropy != None:
(countAll, entropyAll , countStream, entropyStream, countNonStream, entropyNonStream) = oEntropy.calc()
attEntropyAll.nodeValue = '%f' % entropyAll
attCountAll.nodeValue = '%d' % countAll
attEntropyStream.nodeValue = '%f' % entropyStream
attCountStream.nodeValue = '%d' % countStream
attEntropyNonStream.nodeValue = '%f' % entropyNonStream
attCountNonStream.nodeValue = '%d' % countNonStream
else:
attEntropyAll.nodeValue = ''
attCountAll.nodeValue = ''
attEntropyStream.nodeValue = ''
attCountStream.nodeValue = ''
attEntropyNonStream.nodeValue = ''
attCountNonStream.nodeValue = ''
attCountEOF = xmlDoc.createAttribute('CountEOF')
xmlDoc.documentElement.setAttributeNode(attCountEOF)
attCountCharsAfterLastEOF = xmlDoc.createAttribute('CountCharsAfterLastEOF')
xmlDoc.documentElement.setAttributeNode(attCountCharsAfterLastEOF)
if oPDFEOF != None:
attCountEOF.nodeValue = '%d' % oPDFEOF.cntEOFs
attCountCharsAfterLastEOF.nodeValue = '%d' % oPDFEOF.cntCharsAfterLastEOF
else:
attCountEOF.nodeValue = ''
attCountCharsAfterLastEOF.nodeValue = ''
eleKeywords = xmlDoc.createElement('Keywords')
xmlDoc.documentElement.appendChild(eleKeywords)
for keyword in keywords:
eleKeyword = xmlDoc.createElement('Keyword')
eleKeywords.appendChild(eleKeyword)
att = xmlDoc.createAttribute('Name')
att.nodeValue = keyword
eleKeyword.setAttributeNode(att)
att = xmlDoc.createAttribute('Count')
att.nodeValue = str(words[keyword][0])
eleKeyword.setAttributeNode(att)
att = xmlDoc.createAttribute('HexcodeCount')
att.nodeValue = str(words[keyword][1])
eleKeyword.setAttributeNode(att)
eleKeyword = xmlDoc.createElement('Keyword')
eleKeywords.appendChild(eleKeyword)
att = xmlDoc.createAttribute('Name')
att.nodeValue = '/Colors > 2^24'
eleKeyword.setAttributeNode(att)
att = xmlDoc.createAttribute('Count')
att.nodeValue = str(oCVE_2009_3459.count)
eleKeyword.setAttributeNode(att)
att = xmlDoc.createAttribute('HexcodeCount')
att.nodeValue = str(0)
eleKeyword.setAttributeNode(att)
if allNames:
keys = sorted(words.keys())
for word in keys:
if not word in keywords:
eleKeyword = xmlDoc.createElement('Keyword')
eleKeywords.appendChild(eleKeyword)
att = xmlDoc.createAttribute('Name')
att.nodeValue = word
eleKeyword.setAttributeNode(att)
att = xmlDoc.createAttribute('Count')
att.nodeValue = str(words[word][0])
eleKeyword.setAttributeNode(att)
att = xmlDoc.createAttribute('HexcodeCount')
att.nodeValue = str(words[word][1])
eleKeyword.setAttributeNode(att)
eleDates = xmlDoc.createElement('Dates')
xmlDoc.documentElement.appendChild(eleDates)
dates.sort(key=lambda x: x[0])
for date in dates:
eleDate = xmlDoc.createElement('Date')
eleDates.appendChild(eleDate)
att = xmlDoc.createAttribute('Value')
att.nodeValue = date[0]
eleDate.setAttributeNode(att)
att = xmlDoc.createAttribute('Name')
att.nodeValue = date[1]
eleDate.setAttributeNode(att)
return xmlDoc
def PDFiD2String(xmlDoc, force):
result = 'PDFiD %s %s\n' % (xmlDoc.documentElement.getAttribute('Version'), xmlDoc.documentElement.getAttribute('Filename'))
if xmlDoc.documentElement.getAttribute('ErrorOccured') == 'True':
return result + '***Error occured***\n%s\n' % xmlDoc.documentElement.getAttribute('ErrorMessage')
if not force and xmlDoc.documentElement.getAttribute('IsPDF') == 'False':
return result + ' Not a PDF document\n'
result += ' PDF Header: %s\n' % xmlDoc.documentElement.getAttribute('Header')
for node in xmlDoc.documentElement.getElementsByTagName('Keywords')[0].childNodes:
result += ' %-16s %7d' % (node.getAttribute('Name'), int(node.getAttribute('Count')))
if int(node.getAttribute('HexcodeCount')) > 0:
result += '(%d)' % int(node.getAttribute('HexcodeCount'))
result += '\n'
if xmlDoc.documentElement.getAttribute('CountEOF') != '':
result += ' %-16s %7d\n' % ('%%EOF', int(xmlDoc.documentElement.getAttribute('CountEOF')))
if xmlDoc.documentElement.getAttribute('CountCharsAfterLastEOF') != '':
result += ' %-16s %7d\n' % ('After last %%EOF', int(xmlDoc.documentElement.getAttribute('CountCharsAfterLastEOF')))
for node in xmlDoc.documentElement.getElementsByTagName('Dates')[0].childNodes:
result += ' %-23s %s\n' % (node.getAttribute('Value'), node.getAttribute('Name'))
if xmlDoc.documentElement.getAttribute('TotalEntropy') != '':
result += ' Total entropy: %s (%10s bytes)\n' % (xmlDoc.documentElement.getAttribute('TotalEntropy'), xmlDoc.documentElement.getAttribute('TotalCount'))
if xmlDoc.documentElement.getAttribute('StreamEntropy') != '':
result += ' Entropy inside streams: %s (%10s bytes)\n' % (xmlDoc.documentElement.getAttribute('StreamEntropy'), xmlDoc.documentElement.getAttribute('StreamCount'))
if xmlDoc.documentElement.getAttribute('NonStreamEntropy') != '':
result += ' Entropy outside streams: %s (%10s bytes)\n' % (xmlDoc.documentElement.getAttribute('NonStreamEntropy'), xmlDoc.documentElement.getAttribute('NonStreamCount'))
return result
class cCount():
def __init__(self, count, hexcode):
self.count = count
self.hexcode = hexcode
class cPDFiD():
def __init__(self, xmlDoc, force):
self.version = xmlDoc.documentElement.getAttribute('Version')
self.filename = xmlDoc.documentElement.getAttribute('Filename')
self.errorOccured = xmlDoc.documentElement.getAttribute('ErrorOccured') == 'True'
self.errorMessage = xmlDoc.documentElement.getAttribute('ErrorMessage')
self.isPDF = None
if self.errorOccured:
return
self.isPDF = xmlDoc.documentElement.getAttribute('IsPDF') == 'True'
if not force and not self.isPDF:
return
self.header = xmlDoc.documentElement.getAttribute('Header')
self.keywords = {}
for node in xmlDoc.documentElement.getElementsByTagName('Keywords')[0].childNodes:
self.keywords[node.getAttribute('Name')] = cCount(int(node.getAttribute('Count')), int(node.getAttribute('HexcodeCount')))
self.obj = self.keywords['obj']
self.endobj = self.keywords['endobj']
self.stream = self.keywords['stream']
self.endstream = self.keywords['endstream']
self.xref = self.keywords['xref']
self.trailer = self.keywords['trailer']
self.startxref = self.keywords['startxref']
self.page = self.keywords['/Page']
self.encrypt = self.keywords['/Encrypt']
self.objstm = self.keywords['/ObjStm']
self.js = self.keywords['/JS']
self.javascript = self.keywords['/JavaScript']
self.aa = self.keywords['/AA']
self.openaction = self.keywords['/OpenAction']
self.acroform = self.keywords['/AcroForm']
self.jbig2decode = self.keywords['/JBIG2Decode']
self.richmedia = self.keywords['/RichMedia']
self.launch = self.keywords['/Launch']
self.embeddedfile = self.keywords['/EmbeddedFile']
self.xfa = self.keywords['/XFA']
self.colors_gt_2_24 = self.keywords['/Colors > 2^24']
def Print(lines, options):
print(lines)
filename = None
if options.scan:
filename = 'PDFiD.log'
if options.output != '':
filename = options.output
if filename:
logfile = open(filename, 'a')
logfile.write(lines + '\n')
logfile.close()
def Quote(value, separator, quote):
if isinstance(value, str):
if separator in value:
return quote + value + quote
return value
def MakeCSVLine(fields, separator=';', quote='"'):
formatstring = separator.join([field[0] for field in fields])
strings = [Quote(field[1], separator, quote) for field in fields]
return formatstring % tuple(strings)
def ProcessFile(filename, options, plugins):
xmlDoc = PDFiD(filename, options.all, options.extra, options.disarm, options.force)
if plugins == [] and options.select == '':
Print(PDFiD2String(xmlDoc, options.force), options)
return
oPDFiD = cPDFiD(xmlDoc, options.force)
if options.select:
if options.force or not oPDFiD.errorOccured and oPDFiD.isPDF:
pdf = oPDFiD
try:
selected = eval(options.select)
except Exception as e:
Print('Error evaluating select expression: %s' % options.select, options)
if options.verbose:
raise e
return
if selected:
if options.csv:
Print(filename, options)
else:
Print(PDFiD2String(xmlDoc, options.force), options)
else:
for cPlugin in plugins:
if not cPlugin.onlyValidPDF or not oPDFiD.errorOccured and oPDFiD.isPDF:
try:
oPlugin = cPlugin(oPDFiD)
except Exception as e:
Print('Error instantiating plugin: %s' % cPlugin.name, options)
if options.verbose:
raise e
return
try:
score = oPlugin.Score()
except Exception as e:
Print('Error running plugin: %s' % cPlugin.name, options)
if options.verbose:
raise e
return
if options.csv:
if score >= options.minimumscore:
Print(MakeCSVLine((('%s', filename), ('%s', cPlugin.name), ('%.02f', score))), options)
else:
if score >= options.minimumscore:
Print(PDFiD2String(xmlDoc, options.force), options)
Print('%s score: %.02f' % (cPlugin.name, score), options)
else:
if options.csv:
if oPDFiD.errorOccured:
Print(MakeCSVLine((('%s', filename), ('%s', cPlugin.name), ('%s', 'Error occured'))), options)
if not oPDFiD.isPDF:
Print(MakeCSVLine((('%s', filename), ('%s', cPlugin.name), ('%s', 'Not a PDF document'))), options)
else:
Print(PDFiD2String(xmlDoc, options.force), options)
def Scan(directory, options, plugins):
try:
if os.path.isdir(directory):
for entry in os.listdir(directory):
Scan(os.path.join(directory, entry), options, plugins)
else:
ProcessFile(directory, options, plugins)
except Exception as e:
# print directory
print(e)
# print(sys.exc_info()[2])
# print traceback.format_exc()
#function derived from: http://blog.9bplus.com/pdfidpy-output-to-json
def PDFiD2JSON(xmlDoc, force):
#Get Top Layer Data
errorOccured = xmlDoc.documentElement.getAttribute('ErrorOccured')
errorMessage = xmlDoc.documentElement.getAttribute('ErrorMessage')
filename = xmlDoc.documentElement.getAttribute('Filename')
header = xmlDoc.documentElement.getAttribute('Header')
isPdf = xmlDoc.documentElement.getAttribute('IsPDF')
version = xmlDoc.documentElement.getAttribute('Version')
entropy = xmlDoc.documentElement.getAttribute('Entropy')
#extra data
countEof = xmlDoc.documentElement.getAttribute('CountEOF')
countChatAfterLastEof = xmlDoc.documentElement.getAttribute('CountCharsAfterLastEOF')
totalEntropy = xmlDoc.documentElement.getAttribute('TotalEntropy')
streamEntropy = xmlDoc.documentElement.getAttribute('StreamEntropy')
nonStreamEntropy = xmlDoc.documentElement.getAttribute('NonStreamEntropy')
keywords = []
dates = []
#grab all keywords
for node in xmlDoc.documentElement.getElementsByTagName('Keywords')[0].childNodes:
name = node.getAttribute('Name')
count = int(node.getAttribute('Count'))
if int(node.getAttribute('HexcodeCount')) > 0:
hexCount = int(node.getAttribute('HexcodeCount'))
else:
hexCount = 0
keyword = { 'count':count, 'hexcodecount':hexCount, 'name':name }
keywords.append(keyword)
#grab all date information
for node in xmlDoc.documentElement.getElementsByTagName('Dates')[0].childNodes:
name = node.getAttribute('Name')
value = node.getAttribute('Value')
date = { 'name':name, 'value':value }
dates.append(date)
data = { 'countEof':countEof, 'countChatAfterLastEof':countChatAfterLastEof, 'totalEntropy':totalEntropy, 'streamEntropy':streamEntropy, 'nonStreamEntropy':nonStreamEntropy, 'errorOccured':errorOccured, 'errorMessage':errorMessage, 'filename':filename, 'header':header, 'isPdf':isPdf, 'version':version, 'entropy':entropy, 'keywords': { 'keyword': keywords }, 'dates': { 'date':dates} }
complete = [ { 'pdfid' : data} ]
result = json.dumps(complete)
return result
def File2Strings(filename):
try:
f = open(filename, 'r')
except:
return None
try:
return list(map(lambda line:line.rstrip('\n'), f.readlines()))
except:
return None
finally:
f.close()
def ProcessAt(argument):
if argument.startswith('@'):
strings = File2Strings(argument[1:])
if strings == None:
raise Exception('Error reading %s' % argument)
else:
return strings
else:
return [argument]
def AddPlugin(cClass):
global plugins
plugins.append(cClass)
def ExpandFilenameArguments(filenames):
return list(collections.OrderedDict.fromkeys(sum(map(glob.glob, sum(map(ProcessAt, filenames), [])), [])))
class cPluginParent():
onlyValidPDF = True
def LoadPlugins(plugins, verbose):
if plugins == '':
return
scriptPath = os.path.dirname(sys.argv[0])
for plugin in sum(map(ProcessAt, plugins.split(',')), []):
try:
if not plugin.lower().endswith('.py'):
plugin += '.py'
if os.path.dirname(plugin) == '':
if not os.path.exists(plugin):
scriptPlugin = os.path.join(scriptPath, plugin)
if os.path.exists(scriptPlugin):
plugin = scriptPlugin
exec(open(plugin, 'r').read())
except Exception as e:
print('Error loading plugin: %s' % plugin)
if verbose:
raise e
def PDFiDMain(filenames, options):
global plugins
plugins = []
LoadPlugins(options.plugins, options.verbose)
if options.csv:
if plugins != []:
Print(MakeCSVLine((('%s', 'Filename'), ('%s', 'Plugin-name'), ('%s', 'Score'))), options)
elif options.select != '':
Print('Filename', options)
for filename in filenames:
if options.scan:
Scan(filename, options, plugins)
else:
ProcessFile(filename, options, plugins)
def Main():
moredesc = '''
Arguments:
pdf-file and zip-file can be a single file, several files, and/or @file
@file: run PDFiD on each file listed in the text file specified
wildcards are supported
Source code put in the public domain by Didier Stevens, no Copyright
Use at your own risk
https://DidierStevens.com'''
oParser = optparse.OptionParser(usage='usage: %prog [options] [pdf-file|zip-file|url|@file] ...\n' + __description__ + moredesc, version='%prog ' + __version__)
oParser.add_option('-s', '--scan', action='store_true', default=False, help='scan the given directory')
oParser.add_option('-a', '--all', action='store_true', default=False, help='display all the names')
oParser.add_option('-e', '--extra', action='store_true', default=False, help='display extra data, like dates')
oParser.add_option('-f', '--force', action='store_true', default=False, help='force the scan of the file, even without proper %PDF header')
oParser.add_option('-d', '--disarm', action='store_true', default=False, help='disable JavaScript and auto launch')
oParser.add_option('-p', '--plugins', type=str, default='', help='plugins to load (separate plugins with a comma , ; @file supported)')
oParser.add_option('-c', '--csv', action='store_true', default=False, help='output csv data when using plugins')
oParser.add_option('-m', '--minimumscore', type=float, default=0.0, help='minimum score for plugin results output')
oParser.add_option('-v', '--verbose', action='store_true', default=False, help='verbose (will also raise catched exceptions)')
oParser.add_option('-S', '--select', type=str, default='', help='selection expression')
oParser.add_option('-o', '--output', type=str, default='', help='output to log file')
(options, args) = oParser.parse_args()
if len(args) == 0:
if options.disarm:
print('Option disarm not supported with stdin')
options.disarm = False
if options.scan:
print('Option scan not supported with stdin')
options.scan = False
filenames = ['']
else:
try:
filenames = ExpandFilenameArguments(args)
except Exception as e:
print(e)
return
PDFiDMain(filenames, options)
if __name__ == '__main__':
Main()

View File

@ -0,0 +1,21 @@
#!/usr/bin/env python
#2014/10/13
class cPDFiDEmbeddedFile(cPluginParent):
# onlyValidPDF = True
name = 'EmbeddedFile plugin'
def __init__(self, oPDFiD):
self.oPDFiD = oPDFiD
def Score(self):
if '/EmbeddedFile' in self.oPDFiD.keywords and self.oPDFiD.keywords['/EmbeddedFile'].count > 0:
if self.oPDFiD.keywords['/EmbeddedFile'].hexcode > 0:
return 1.0
else:
return 0.9
else:
return 0.0
AddPlugin(cPDFiDEmbeddedFile)

View File

@ -0,0 +1,3 @@
plugin_embeddedfile.py
plugin_nameobfuscation.py
plugin_triage.py

View File

@ -0,0 +1,19 @@
#!/usr/bin/env python
#2013/11/04
#2013/11/08
class cPDFiDNameObfuscation(cPluginParent):
# onlyValidPDF = True
name = 'Name Obfuscation plugin'
def __init__(self, oPDFiD):
self.oPDFiD = oPDFiD
def Score(self):
if sum([oCount.hexcode for oCount in self.oPDFiD.keywords.values()]) > 0:
return 1.0
else:
return 0.0
AddPlugin(cPDFiDNameObfuscation)

View File

@ -0,0 +1,22 @@
#!/usr/bin/env python
#2014/09/30
class cPDFiDTriage(cPluginParent):
# onlyValidPDF = True
name = 'Triage plugin'
def __init__(self, oPDFiD):
self.oPDFiD = oPDFiD
def Score(self):
for keyword in ('/ObjStm', '/JS', '/JavaScript', '/AA', '/OpenAction', '/AcroForm', '/JBIG2Decode', '/RichMedia', '/Launch', '/EmbeddedFile', '/XFA', '/Colors > 2^24'):
if keyword in self.oPDFiD.keywords and self.oPDFiD.keywords[keyword].count > 0:
return 1.0
if self.oPDFiD.keywords['obj'].count != self.oPDFiD.keywords['endobj'].count:
return 1.0
if self.oPDFiD.keywords['stream'].count != self.oPDFiD.keywords['endstream'].count:
return 1.0
return 0.0
AddPlugin(cPDFiDTriage)

View File

@ -24,14 +24,14 @@ set -e
set -x
# If you use a partition...
#PARTITION_ROOTFS='/dev/mmcblk0p2'
#PARTITION_BOOT='/dev/mmcblk0p1'
PARTITION_ROOTFS='/dev/sdd2'
PARTITION_BOOT='/dev/sdd1'
PARTITION_ROOTFS='/dev/mmcblk0p2'
PARTITION_BOOT='/dev/mmcblk0p1'
#PARTITION_ROOTFS='/dev/sdd2'
#PARTITION_BOOT='/dev/sdd1'
# If you use the img
##### Debian
IMAGE='2015-02-16-raspbian-wheezy.img'
IMAGE='2015-11-06-CIRCLean.img'
OFFSET_ROOTFS=$((122880 * 512))
OFFSET_BOOT=$((8192 * 512))
##### Arch

View File

@ -2,7 +2,7 @@
set timeout -1
spawn qemu-system-arm -kernel 140801-kernel -cpu arm1176 -m 256 -M versatilepb \
spawn qemu-system-arm -kernel kernel-qemu -cpu arm1176 -m 256 -M versatilepb \
-append "root=/dev/sdc2 panic=1 rootfstype=ext4 ro console=ttyAMA0 console=ttyS0" \
-drive file=[lindex $argv 1],index=0,media=disk \
-drive file=[lindex $argv 2],index=1,media=disk \

View File

@ -2,7 +2,7 @@
# http://pub.phyks.me/respawn/mypersonaldata/public/2014-05-20-11-08-01/
IMAGE='../2015-02-16-raspbian-wheezy.img'
IMAGE='../raspbian-wheezy.img'
OFFSET_ROOTFS=$((122880 * 512))
IMAGE_VFAT_NORM="testcase.vfat"