mirror of https://github.com/CIRCL/Circlean
commit
7cd38ee32f
26 changed files with 1525 additions and 22 deletions
@ -0,0 +1,86 @@ |
||||
Install Qemu and Expect |
||||
============ |
||||
|
||||
Install the necessary packages: |
||||
|
||||
``` |
||||
sudo apt-get install qemu qemu-user-static expect |
||||
``` |
||||
|
||||
Create a new image from scratch |
||||
=============================== |
||||
|
||||
* Download the most recent Raspbian version: |
||||
http://downloads.raspberrypi.org/raspbian_latest |
||||
|
||||
* Unpack it: |
||||
|
||||
``` |
||||
unzip 2015-05-05-raspbian-wheezy.zip |
||||
mv 2015-05-05-raspbian-wheezy.zip raspbian-wheezy.zip |
||||
``` |
||||
|
||||
Prepare the image |
||||
================= |
||||
|
||||
It will be used for the build environment and the final image. |
||||
|
||||
* [Add empty space to the image](resize_img.md) |
||||
|
||||
* Chroot in the image |
||||
|
||||
``` |
||||
sudo ./proper_chroot.sh |
||||
``` |
||||
|
||||
* Change your user to root (your global variables may be broken) |
||||
|
||||
``` |
||||
su root |
||||
``` |
||||
|
||||
* The locales may be broken, fix it (remove `en_GB.UTF-8 UTF-8`, set `en_US.UTF-8 UTF-8`): |
||||
|
||||
``` |
||||
dpkg-reconfigure locales |
||||
``` |
||||
|
||||
* In the image, make sure everything is up-to-date, and remove the old packages |
||||
|
||||
``` |
||||
apt-get update |
||||
apt-get dist-upgrade |
||||
apt-get autoremove |
||||
apt-get install p7zip-full python-dev libxml2-dev libxslt1-dev pmount |
||||
``` |
||||
|
||||
* Install python requirements |
||||
|
||||
``` |
||||
pip install lxml |
||||
pip install oletools olefile |
||||
pip install officedissector |
||||
pip install git+https://github.com/Rafiot/python-magic.git@travis |
||||
pip install git+https://github.com/CIRCL/PyCIRCLean.git |
||||
``` |
||||
|
||||
* Create the user and mtab for a RO filesystem |
||||
|
||||
``` |
||||
useradd -m kitten |
||||
chown -R kitten:kitten /home/kitten |
||||
ln -s /proc/mounts /etc/mtab |
||||
``` |
||||
|
||||
* Copy the files |
||||
|
||||
``` |
||||
sudo ./copy_to_final.sh /mnt/arm_rPi/ |
||||
``` |
||||
|
||||
* Enable rc.local |
||||
|
||||
``` |
||||
systemctl enable rc-local.service |
||||
``` |
||||
|
@ -0,0 +1 @@ |
||||
SUBSYSTEM=="hidraw", DRIVERS=="usbhid", RUN+="/bin/bash -c 'cd /sys/devices/platform/bcm2708_usb/usb1/1-1 ; for d in $(ls -d 1-1.[2-5]); do if [ $(ls -lR $d | grep -c usbhid) -gt 0 ] ; then echo 0 > $d/authorized ; fi ; done'" |
@ -0,0 +1,6 @@ |
||||
proc /proc proc defaults 0 0 |
||||
/dev/mmcblk0p1 /boot vfat ro,defaults 0 0 |
||||
/dev/mmcblk0p2 / ext4 ro,defaults,noatime 0 0 |
||||
tmpfs /tmp tmpfs rw,size=64M,noexec,nodev,nosuid,mode=1777 0 0 |
||||
tmpfs /media tmpfs rw,size=64M,noexec,nodev,nosuid,mode=1777 0 0 |
||||
# a swapfile is not a swap partition, so no using swapon|off from here on, use dphys-swapfile swap[on|off] for that |
@ -0,0 +1,54 @@ |
||||
root:x:0: |
||||
daemon:x:1: |
||||
bin:x:2: |
||||
sys:x:3: |
||||
adm:x:4:pi |
||||
tty:x:5: |
||||
disk:x:6: |
||||
lp:x:7: |
||||
mail:x:8: |
||||
news:x:9: |
||||
uucp:x:10: |
||||
man:x:12: |
||||
proxy:x:13: |
||||
kmem:x:15: |
||||
dialout:x:20:pi |
||||
fax:x:21: |
||||
voice:x:22: |
||||
cdrom:x:24:pi |
||||
floppy:x:25: |
||||
tape:x:26: |
||||
sudo:x:27:pi |
||||
audio:x:29:pi |
||||
dip:x:30: |
||||
www-data:x:33: |
||||
backup:x:34: |
||||
operator:x:37: |
||||
list:x:38: |
||||
irc:x:39: |
||||
src:x:40: |
||||
gnats:x:41: |
||||
shadow:x:42: |
||||
utmp:x:43: |
||||
video:x:44:pi |
||||
sasl:x:45: |
||||
plugdev:x:46:pi,kitten |
||||
staff:x:50: |
||||
games:x:60:pi |
||||
users:x:100:pi |
||||
nogroup:x:65534: |
||||
libuuid:x:101: |
||||
crontab:x:102: |
||||
pi:x:1000: |
||||
ssh:x:103: |
||||
ntp:x:104: |
||||
netdev:x:105:pi |
||||
input:x:999:pi |
||||
messagebus:x:106: |
||||
lpadmin:x:107: |
||||
fuse:x:108: |
||||
lightdm:x:109: |
||||
indiecity:x:1001:root |
||||
spi:x:1002:pi |
||||
gpio:x:1003:pi |
||||
kitten:x:1004: |
@ -0,0 +1,61 @@ |
||||
# |
||||
# The PAM configuration file for the Shadow `su' service |
||||
# |
||||
|
||||
# This allows root to su without passwords (normal operation) |
||||
auth sufficient pam_rootok.so |
||||
|
||||
# Uncomment this to force users to be a member of group root |
||||
# before they can use `su'. You can also add "group=foo" |
||||
# to the end of this line if you want to use a group other |
||||
# than the default "root" (but this may have side effect of |
||||
# denying "root" user, unless she's a member of "foo" or explicitly |
||||
# permitted earlier by e.g. "sufficient pam_rootok.so"). |
||||
# (Replaces the `SU_WHEEL_ONLY' option from login.defs) |
||||
# auth required pam_wheel.so |
||||
|
||||
# Uncomment this if you want wheel members to be able to |
||||
# su without a password. |
||||
# auth sufficient pam_wheel.so trust |
||||
|
||||
# Uncomment this if you want members of a specific group to not |
||||
# be allowed to use su at all. |
||||
# auth required pam_wheel.so deny group=nosu |
||||
|
||||
# Uncomment and edit /etc/security/time.conf if you need to set |
||||
# time restrainst on su usage. |
||||
# (Replaces the `PORTTIME_CHECKS_ENAB' option from login.defs |
||||
# as well as /etc/porttime) |
||||
# account requisite pam_time.so |
||||
|
||||
# This module parses environment configuration file(s) |
||||
# and also allows you to use an extended config |
||||
# file /etc/security/pam_env.conf. |
||||
# |
||||
# parsing /etc/environment needs "readenv=1" |
||||
session required pam_env.so readenv=1 |
||||
# locale variables are also kept into /etc/default/locale in etch |
||||
# reading this file *in addition to /etc/environment* does not hurt |
||||
session required pam_env.so readenv=1 envfile=/etc/default/locale |
||||
|
||||
# Defines the MAIL environment variable |
||||
# However, userdel also needs MAIL_DIR and MAIL_FILE variables |
||||
# in /etc/login.defs to make sure that removing a user |
||||
# also removes the user's mail spool file. |
||||
# See comments in /etc/login.defs |
||||
# |
||||
# "nopen" stands to avoid reporting new mail when su'ing to another user |
||||
session optional pam_mail.so nopen |
||||
|
||||
# Sets up user limits according to /etc/security/limits.conf |
||||
# (Replaces the use of /etc/limits in old login) |
||||
#session required pam_limits.so |
||||
|
||||
# The standard Unix authentication modules, used with |
||||
# NIS (man nsswitch) as well as normal /etc/passwd and |
||||
# /etc/shadow entries. |
||||
@include common-auth |
||||
@include common-account |
||||
@include common-session |
||||
|
||||
|
@ -0,0 +1,30 @@ |
||||
root:x:0:0:root:/root:/bin/bash |
||||
daemon:x:1:1:daemon:/usr/sbin:/usr/sbin/nologin |
||||
bin:x:2:2:bin:/bin:/usr/sbin/nologin |
||||
sys:x:3:3:sys:/dev:/usr/sbin/nologin |
||||
sync:x:4:65534:sync:/bin:/bin/sync |
||||
games:x:5:60:games:/usr/games:/usr/sbin/nologin |
||||
man:x:6:12:man:/var/cache/man:/usr/sbin/nologin |
||||
lp:x:7:7:lp:/var/spool/lpd:/usr/sbin/nologin |
||||
mail:x:8:8:mail:/var/mail:/usr/sbin/nologin |
||||
news:x:9:9:news:/var/spool/news:/usr/sbin/nologin |
||||
uucp:x:10:10:uucp:/var/spool/uucp:/usr/sbin/nologin |
||||
proxy:x:13:13:proxy:/bin:/usr/sbin/nologin |
||||
www-data:x:33:33:www-data:/var/www:/usr/sbin/nologin |
||||
backup:x:34:34:backup:/var/backups:/usr/sbin/nologin |
||||
list:x:38:38:Mailing List Manager:/var/list:/usr/sbin/nologin |
||||
irc:x:39:39:ircd:/var/run/ircd:/usr/sbin/nologin |
||||
gnats:x:41:41:Gnats Bug-Reporting System (admin):/var/lib/gnats:/usr/sbin/nologin |
||||
nobody:x:65534:65534:nobody:/nonexistent:/usr/sbin/nologin |
||||
systemd-timesync:x:100:103:systemd Time Synchronization,,,:/run/systemd:/bin/false |
||||
systemd-network:x:101:104:systemd Network Management,,,:/run/systemd/netif:/bin/false |
||||
systemd-resolve:x:102:105:systemd Resolver,,,:/run/systemd/resolve:/bin/false |
||||
systemd-bus-proxy:x:103:106:systemd Bus Proxy,,,:/run/systemd:/bin/false |
||||
pi:x:1000:1000:,,,:/home/pi:/bin/false |
||||
sshd:x:104:65534::/var/run/sshd:/usr/sbin/nologin |
||||
messagebus:x:105:110::/var/run/dbus:/bin/false |
||||
avahi:x:106:111:Avahi mDNS daemon,,,:/var/run/avahi-daemon:/bin/false |
||||
ntp:x:107:112::/home/ntp:/bin/false |
||||
statd:x:108:65534::/var/lib/nfs:/bin/false |
||||
lightdm:x:109:114:Light Display Manager:/var/lib/lightdm:/bin/false |
||||
kitten:x:1001:1004::/home/kitten:/bin/bash |
@ -0,0 +1,5 @@ |
||||
# /etc/pmount.allow |
||||
# pmount will allow users to additionally mount all devices that are |
||||
# listed here. |
||||
/dev/sdb1 |
||||
/dev/sda* |
@ -0,0 +1,19 @@ |
||||
#!/bin/sh |
||||
# Part of raspi-config http://github.com/asb/raspi-config |
||||
# |
||||
# See LICENSE file for copyright and license details |
||||
|
||||
# Should be installed to /etc/profile.d/raspi-config.sh to force raspi-config |
||||
# to run at initial login |
||||
|
||||
# You may also want to set automatic login in /etc/inittab on tty1 by adding a |
||||
# line such as: |
||||
# 1:2345:respawn:/bin/login -f root tty1 </dev/tty1 >/dev/tty1 2>&1 # RPICFG_TO_DISABLE |
||||
|
||||
if [ $(id -u) -ne 0 ]; then |
||||
printf "\nNOTICE: the software on this Raspberry Pi has not been fully configured. Please run 'sudo raspi-config'\n\n" |
||||
else |
||||
# Disable raspi-config at the first run. |
||||
# raspi-config |
||||
exec login -f pi |
||||
fi |
@ -0,0 +1,36 @@ |
||||
#!/bin/sh -e |
||||
# |
||||
# rc.local |
||||
# |
||||
# This script is executed at the end of each multiuser runlevel. |
||||
# Make sure that the script will "exit 0" on success or any other |
||||
# value on error. |
||||
# |
||||
# In order to enable or disable this script just change the execution |
||||
# bits. |
||||
# |
||||
# By default this script does nothing. |
||||
|
||||
clean(){ |
||||
echo 'Rc Local done, quit.' |
||||
/sbin/shutdown -P -h now |
||||
} |
||||
|
||||
# Print the IP address |
||||
_IP=$(hostname -I) || true |
||||
if [ "$_IP" ]; then |
||||
printf "My IP address is %s\n" "$_IP" |
||||
fi |
||||
|
||||
if [ -e /dev/sda ]; then |
||||
if [ -e /dev/sdb ]; then |
||||
# avoid possible misuse |
||||
/sbin/ifconfig eth0 down |
||||
trap clean EXIT TERM INT |
||||
cd /opt/groomer |
||||
/usr/sbin/led & |
||||
./init.sh |
||||
fi |
||||
fi |
||||
|
||||
exit 0 |
@ -0,0 +1 @@ |
||||
kitten hard priority -20 |
@ -0,0 +1,28 @@ |
||||
# |
||||
# This file MUST be edited with the 'visudo' command as root. |
||||
# |
||||
# Please consider adding local content in /etc/sudoers.d/ instead of |
||||
# directly modifying this file. |
||||
# |
||||
# See the man page for details on how to write a sudoers file. |
||||
# |
||||
Defaults env_reset |
||||
Defaults mail_badpass |
||||
Defaults secure_path="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" |
||||
|
||||
# Host alias specification |
||||
|
||||
# User alias specification |
||||
|
||||
# Cmnd alias specification |
||||
|
||||
# User privilege specification |
||||
#root ALL=(ALL:ALL) ALL |
||||
|
||||
# Allow members of group sudo to execute any command |
||||
#%sudo ALL=(ALL:ALL) ALL |
||||
|
||||
# See sudoers(5) for more information on "#include" directives: |
||||
|
||||
#includedir /etc/sudoers.d |
||||
#pi ALL=(ALL) NOPASSWD: ALL |
@ -0,0 +1,12 @@ |
||||
[Unit] |
||||
Description=/etc/rc.local Compatibility |
||||
|
||||
[Service] |
||||
Type=oneshot |
||||
ExecStart=/etc/rc.local |
||||
TimeoutSec=0 |
||||
StandardInput=tty |
||||
RemainAfterExit=yes |
||||
|
||||
[Install] |
||||
WantedBy=multi-user.target |
@ -0,0 +1 @@ |
||||
SUBSYSTEM=="hidraw", DRIVERS=="usbhid", RUN+="/bin/bash -c 'cd /sys/devices/platform/bcm2708_usb/usb1/1-1 ; for d in $(ls -d 1-1.[2-5]); do if [ $(ls -lR $d | grep -c usbhid) -gt 0 ] ; then echo 0 > $d/authorized ; fi ; done'" |
@ -0,0 +1,2 @@ |
||||
KERNEL=="sdc", SYMLINK+="mmcblk0" |
||||
KERNEL=="sdc?", SYMLINK+="mmcblk0p%n", |
@ -0,0 +1,23 @@ |
||||
DEV_SRC='/dev/sda' |
||||
DEV_DST='sdb1' |
||||
|
||||
# User allowed to do the following commands without password |
||||
USERNAME='kitten' |
||||
MUSIC="/opt/midi/" |
||||
|
||||
ID=`/usr/bin/id -u` |
||||
|
||||
# Paths used in multiple scripts |
||||
SRC="src" |
||||
DST="dst" |
||||
TEMP="/media/${DST}/temp" |
||||
ZIPTEMP="/media/${DST}/ziptemp" |
||||
LOGS="/media/${DST}/logs" |
||||
|
||||
|
||||
# commands |
||||
SYNC='/bin/sync' |
||||
TIMIDITY='/usr/bin/timidity' |
||||
MOUNT='/bin/mount' |
||||
PMOUNT='/usr/bin/pmount -A -s' |
||||
PUMOUNT='/usr/bin/pumount' |
@ -0,0 +1,112 @@ |
||||
#!/bin/bash |
||||
|
||||
set -e |
||||
set -x |
||||
|
||||
source ./constraint.sh |
||||
if ! [ "${ID}" -ge "1000" ]; then |
||||
echo "This script cannot run as root." |
||||
exit |
||||
fi |
||||
|
||||
clean(){ |
||||
echo Cleaning. |
||||
${SYNC} |
||||
|
||||
# Cleanup source |
||||
pumount ${SRC} |
||||
|
||||
# Cleanup destination |
||||
rm -rf ${TEMP} |
||||
rm -rf ${ZIPTEMP} |
||||
pumount ${DST} |
||||
|
||||
exit |
||||
} |
||||
|
||||
trap clean EXIT TERM INT |
||||
|
||||
# De we have a source device |
||||
if [ ! -b ${DEV_SRC} ]; then |
||||
echo "Source device (${DEV_SRC}) does not exists." |
||||
exit |
||||
fi |
||||
# Find the partition names on the source device |
||||
DEV_PARTITIONS=`ls "${DEV_SRC}"* | grep "${DEV_SRC}[1-9][0-6]*" || true` |
||||
if [ -z "${DEV_PARTITIONS}" ]; then |
||||
echo "${DEV_SRC} does not have any partitions." |
||||
exit |
||||
fi |
||||
|
||||
# Do we have a destination device |
||||
if [ ! -b "/dev/${DEV_DST}" ]; then |
||||
echo "Destination device (/dev/${DEV_DST}) does not exists." |
||||
exit |
||||
fi |
||||
|
||||
# mount and prepare destination device |
||||
if ${MOUNT}|grep ${DST}; then |
||||
${PUMOUNT} ${DST} || true |
||||
fi |
||||
# uid= only works on a vfat FS. What should wedo if we get an ext* FS ? |
||||
${PMOUNT} -w ${DEV_DST} ${DST} |
||||
if [ ${?} -ne 0 ]; then |
||||
echo "Unable to mount /dev/${DEV_DST} on /media/${DST}" |
||||
exit |
||||
else |
||||
echo "Target USB device (/dev/${DEV_DST}) mounted at /media/${DST}" |
||||
rm -rf "/media/${DST}/FROM_PARTITION_"* |
||||
|
||||
# prepare temp dirs and make sure it's empty |
||||
mkdir -p "${TEMP}" |
||||
mkdir -p "${ZIPTEMP}" |
||||
mkdir -p "${LOGS}" |
||||
|
||||
rm -rf "${TEMP}/"* |
||||
rm -rf "${ZIPTEMP}/"* |
||||
rm -rf "${LOGS}/"* |
||||
fi |
||||
|
||||
# Groom da kitteh! |
||||
|
||||
# Find the FS types |
||||
# lsblk -n -o name,fstype,mountpoint,label,uuid -r |
||||
|
||||
PARTCOUNT=1 |
||||
for partition in ${DEV_PARTITIONS} |
||||
do |
||||
# Processing a partition |
||||
echo "Processing partition: ${partition}" |
||||
if [ `${MOUNT} | grep -c ${SRC}` -ne 0 ]; then |
||||
${PUMOUNT} ${SRC} |
||||
fi |
||||
|
||||
${PMOUNT} -w ${partition} ${SRC} |
||||
ls "/media/${SRC}" | grep -i autorun.inf | xargs -I {} mv "/media/${SRC}"/{} "/media/${SRC}"/DANGEROUS_{}_DANGEROUS || true |
||||
${PUMOUNT} ${SRC} |
||||
${PMOUNT} -r ${partition} ${SRC} |
||||
if [ ${?} -ne 0 ]; then |
||||
echo "Unable to mount ${partition} on /media/${SRC}" |
||||
else |
||||
echo "${partition} mounted at /media/${SRC}" |
||||
|
||||
# Print the filenames on the current partition in a logfile |
||||
find "/media/${SRC}" -fls "${LOGS}/Content_partition_${PARTCOUNT}.txt" |
||||
|
||||
# create a directory on ${DST} named PARTION_$PARTCOUNT |
||||
target_dir="/media/${DST}/FROM_PARTITION_${PARTCOUNT}" |
||||
echo "copying to: ${target_dir}" |
||||
mkdir -p "${target_dir}" |
||||
LOGFILE="${LOGS}/processing.txt" |
||||
|
||||
echo "==== Starting processing of /media/${SRC} to ${target_dir}. ====" >> ${LOGFILE} |
||||
filecheck.py --source /media/${SRC} --destination ${target_dir} || true |
||||
echo "==== Done with /media/${SRC} to ${target_dir}. ====" >> ${LOGFILE} |
||||
|
||||
ls -lR "${target_dir}" |
||||
fi |
||||
let PARTCOUNT=`expr $PARTCOUNT + 1` |
||||
done |
||||
|
||||
# The cleanup is automatically done in the function clean called when |
||||
# the program quits |
@ -0,0 +1,26 @@ |
||||
#!/bin/bash |
||||
|
||||
set -e |
||||
set -x |
||||
|
||||
source ./constraint.sh |
||||
|
||||
if [ ${ID} -ne 0 ]; then |
||||
echo "This script has to be run as root." |
||||
exit |
||||
fi |
||||
|
||||
clean(){ |
||||
echo Done, cleaning. |
||||
${SYNC} |
||||
kill -9 $(cat /tmp/music.pid) |
||||
rm -f /tmp/music.pid |
||||
} |
||||
|
||||
trap clean EXIT TERM INT |
||||
|
||||
./music.sh & |
||||
echo $! > /tmp/music.pid |
||||
|
||||
su ${USERNAME} -c ./groomer.sh |
||||
|
@ -0,0 +1,21 @@ |
||||
#!/bin/bash |
||||
|
||||
set -e |
||||
#set -x |
||||
|
||||
source ./constraint.sh |
||||
|
||||
killed(){ |
||||
echo 'Music stopped.' |
||||
} |
||||
|
||||
trap killed EXIT TERM INT |
||||
|
||||
# Force output on analog |
||||
amixer cset numid=3 1 |
||||
|
||||
files=(${MUSIC}*) |
||||
|
||||
while true; do |
||||
$TIMIDITY ${files[RANDOM % ${#files[@]}]} |
||||
done |
@ -0,0 +1,930 @@ |
||||
#!/usr/bin/env python |
||||
|
||||
__description__ = 'Tool to test a PDF file' |
||||
__author__ = 'Didier Stevens' |
||||
__version__ = '0.2.1' |
||||
__date__ = '2014/10/18' |
||||
|
||||
""" |
||||
|
||||
Tool to test a PDF file |
||||
|
||||
Source code put in public domain by Didier Stevens, no Copyright |
||||
https://DidierStevens.com |
||||
Use at your own risk |
||||
|
||||
History: |
||||
2009/03/27: start |
||||
2009/03/28: scan option |
||||
2009/03/29: V0.0.2: xml output |
||||
2009/03/31: V0.0.3: /ObjStm suggested by Dion |
||||
2009/04/02: V0.0.4: added ErrorMessage |
||||
2009/04/20: V0.0.5: added Dates |
||||
2009/04/21: V0.0.6: added entropy |
||||
2009/04/22: added disarm |
||||
2009/04/29: finished disarm |
||||
2009/05/13: V0.0.7: added cPDFEOF |
||||
2009/07/24: V0.0.8: added /AcroForm and /RichMedia, simplified %PDF header regex, extra date format (without TZ) |
||||
2009/07/25: added input redirection, option --force |
||||
2009/10/13: V0.0.9: added detection for CVE-2009-3459; added /RichMedia to disarm |
||||
2010/01/11: V0.0.10: relaxed %PDF header checking |
||||
2010/04/28: V0.0.11: added /Launch |
||||
2010/09/21: V0.0.12: fixed cntCharsAfterLastEOF bug; fix by Russell Holloway |
||||
2011/12/29: updated for Python 3, added keyword /EmbeddedFile |
||||
2012/03/03: added PDFiD2JSON; coded by Brandon Dixon |
||||
2013/02/10: V0.1.0: added http/https support; added support for ZIP file with password 'infected' |
||||
2013/03/11: V0.1.1: fixes for Python 3 |
||||
2013/03/13: V0.1.2: Added error handling for files; added /XFA |
||||
2013/11/01: V0.2.0: Added @file & plugins |
||||
2013/11/02: continue |
||||
2013/11/04: added options -c, -m, -v |
||||
2013/11/06: added option -S |
||||
2013/11/08: continue |
||||
2013/11/09: added option -o |
||||
2013/11/15: refactoring |
||||
2014/09/30: added CSV header |
||||
2014/10/16: V0.2.1: added output when plugin & file not pdf |
||||
2014/10/18: some fixes for Python 3 |
||||
|
||||
Todo: |
||||
- update XML example (entropy, EOF) |
||||
- code review, cleanup |
||||
""" |
||||
|
||||
import optparse |
||||
import os |
||||
import re |
||||
import xml.dom.minidom |
||||
import traceback |
||||
import math |
||||
import operator |
||||
import os.path |
||||
import sys |
||||
import json |
||||
import zipfile |
||||
import collections |
||||
import glob |
||||
try: |
||||
import urllib2 |
||||
urllib23 = urllib2 |
||||
except: |
||||
import urllib.request |
||||
urllib23 = urllib.request |
||||
|
||||
#Convert 2 Bytes If Python 3 |
||||
def C2BIP3(string): |
||||
if sys.version_info[0] > 2: |
||||
return bytes([ord(x) for x in string]) |
||||
else: |
||||
return string |
||||
|
||||
class cBinaryFile: |
||||
def __init__(self, file): |
||||
self.file = file |
||||
if file == '': |
||||
self.infile = sys.stdin |
||||
elif file.lower().startswith('http://') or file.lower().startswith('https://'): |
||||
try: |
||||
if sys.hexversion >= 0x020601F0: |
||||
self.infile = urllib23.urlopen(file, timeout=5) |
||||
else: |
||||
self.infile = urllib23.urlopen(file) |
||||
except urllib23.HTTPError: |
||||
print('Error accessing URL %s' % file) |
||||
print(sys.exc_info()[1]) |
||||
sys.exit() |
||||
elif file.lower().endswith('.zip'): |
||||
try: |
||||
self.zipfile = zipfile.ZipFile(file, 'r') |
||||
self.infile = self.zipfile.open(self.zipfile.infolist()[0], 'r', C2BIP3('infected')) |
||||
except: |
||||
print('Error opening file %s' % file) |
||||
print(sys.exc_info()[1]) |
||||
sys.exit() |
||||
else: |
||||
try: |
||||
self.infile = open(file, 'rb') |
||||
except: |
||||
print('Error opening file %s' % file) |
||||
print(sys.exc_info()[1]) |
||||
sys.exit() |
||||
self.ungetted = [] |
||||
|
||||
def byte(self): |
||||
if len(self.ungetted) != 0: |
||||
return self.ungetted.pop() |
||||
inbyte = self.infile.read(1) |
||||
if not inbyte or inbyte == '': |
||||
self.infile.close() |
||||
return None |
||||
return ord(inbyte) |
||||
|
||||
def bytes(self, size): |
||||
if size <= len(self.ungetted): |
||||
result = self.ungetted[0:size] |
||||
del self.ungetted[0:size] |
||||
return result |
||||
inbytes = self.infile.read(size - len(self.ungetted)) |
||||
if inbytes == '': |
||||
self.infile.close() |
||||
if type(inbytes) == type(''): |
||||
result = self.ungetted + [ord(b) for b in inbytes] |
||||
else: |
||||
result = self.ungetted + [b for b in inbytes] |
||||
self.ungetted = [] |
||||
return result |
||||
|
||||
def unget(self, byte): |
||||
self.ungetted.append(byte) |
||||
|
||||
def ungets(self, bytes): |
||||
bytes.reverse() |
||||
self.ungetted.extend(bytes) |
||||
|
||||
class cPDFDate: |
||||
def __init__(self): |
||||
self.state = 0 |
||||
|
||||
def parse(self, char): |
||||
if char == 'D': |
||||
self.state = 1 |
||||
return None |
||||
elif self.state == 1: |
||||
if char == ':': |
||||
self.state = 2 |
||||
self.digits1 = '' |
||||
else: |
||||
self.state = 0 |
||||
return None |
||||
elif self.state == 2: |
||||
if len(self.digits1) < 14: |
||||
if char >= '0' and char <= '9': |
||||
self.digits1 += char |
||||
return None |
||||
else: |
||||
self.state = 0 |
||||
return None |
||||
elif char == '+' or char == '-' or char == 'Z': |
||||
self.state = 3 |
||||
self.digits2 = '' |
||||
self.TZ = char |
||||
return None |
||||
elif char == '"': |
||||
self.state = 0 |
||||
self.date = 'D:' + self.digits1 |
||||
return self.date |
||||
elif char < '0' or char > '9': |
||||
self.state = 0 |
||||
self.date = 'D:' + self.digits1 |
||||
return self.date |
||||
else: |
||||
self.state = 0 |
||||
return None |
||||
elif self.state == 3: |
||||
if len(self.digits2) < 2: |
||||
if char >= '0' and char <= '9': |
||||
self.digits2 += char |
||||
return None |
||||
else: |
||||
self.state = 0 |
||||
return None |
||||
elif len(self.digits2) == 2: |
||||
if char == "'": |
||||
self.digits2 += char |
||||
return None |
||||
else: |
||||
self.state = 0 |
||||
return None |
||||
elif len(self.digits2) < 5: |
||||
if char >= '0' and char <= '9': |
||||
self.digits2 += char |
||||
if len(self.digits2) == 5: |
||||
self.state = 0 |
||||
self.date = 'D:' + self.digits1 + self.TZ + self.digits2 |
||||
return self.date |
||||
else: |
||||
return None |
||||
else: |
||||
self.state = 0 |
||||
return None |
||||
|
||||
def fEntropy(countByte, countTotal): |
||||
x = float(countByte) / countTotal |
||||
if x > 0: |
||||
return - x * math.log(x, 2) |
||||
else: |
||||
return 0.0 |
||||
|
||||
class cEntropy: |
||||
def __init__(self): |
||||
self.allBucket = [0 for i in range(0, 256)] |
||||
self.streamBucket = [0 for i in range(0, 256)] |
||||
|
||||
def add(self, byte, insideStream): |
||||
self.allBucket[byte] += 1 |
||||
if insideStream: |
||||
self.streamBucket[byte] += 1 |
||||
|
||||
def removeInsideStream(self, byte): |
||||
if self.streamBucket[byte] > 0: |
||||
self.streamBucket[byte] -= 1 |
||||
|
||||
def calc(self): |
||||
self.nonStreamBucket = map(operator.sub, self.allBucket, self.streamBucket) |
||||
allCount = sum(self.allBucket) |
||||
streamCount = sum(self.streamBucket) |
||||
nonStreamCount = sum(self.nonStreamBucket) |
||||
return (allCount, sum(map(lambda x: fEntropy(x, allCount), self.allBucket)), streamCount, sum(map(lambda x: fEntropy(x, streamCount), self.streamBucket)), nonStreamCount, sum(map(lambda x: fEntropy(x, nonStreamCount), self.nonStreamBucket))) |
||||
|
||||
class cPDFEOF: |
||||
def __init__(self): |
||||
self.token = '' |
||||
self.cntEOFs = 0 |
||||
|
||||
def parse(self, char): |
||||
if self.cntEOFs > 0: |
||||
self.cntCharsAfterLastEOF += 1 |
||||
if self.token == '' and char == '%': |
||||
self.token += char |
||||
return |
||||
elif self.token == '%' and char == '%': |
||||
self.token += char |
||||
return |
||||
elif self.token == '%%' and char == 'E': |
||||
self.token += char |
||||
return |
||||
elif self.token == '%%E' and char == 'O': |
||||
self.token += char |
||||
return |
||||
elif self.token == '%%EO' and char == 'F': |
||||
self.token += char |
||||
return |
||||
elif self.token == '%%EOF' and (char == '\n' or char == '\r' or char == ' ' or char == '\t'): |
||||
self.cntEOFs += 1 |
||||
self.cntCharsAfterLastEOF = 0 |
||||
if char == '\n': |
||||
self.token = '' |
||||
else: |
||||
self.token += char |
||||
return |
||||
elif self.token == '%%EOF\r': |
||||
if char == '\n': |
||||
self.cntCharsAfterLastEOF = 0 |
||||
self.token = '' |
||||
else: |
||||
self.token = '' |
||||
|
||||
def FindPDFHeaderRelaxed(oBinaryFile): |
||||
bytes = oBinaryFile.bytes(1024) |
||||
index = ''.join([chr(byte) for byte in bytes]).find('%PDF') |
||||
if index == -1: |
||||
oBinaryFile.ungets(bytes) |
||||
return ([], None) |
||||
for endHeader in range(index + 4, index + 4 + 10): |
||||
if bytes[endHeader] == 10 or bytes[endHeader] == 13: |
||||
break |
||||
oBinaryFile.ungets(bytes[endHeader:]) |
||||
return (bytes[0:endHeader], ''.join([chr(byte) for byte in bytes[index:endHeader]])) |
||||
|
||||
def Hexcode2String(char): |
||||
if type(char) == int: |
||||
return '#%02x' % char |
||||
else: |
||||
return char |
||||
|
||||
def SwapCase(char): |
||||
if type(char) == int: |
||||
return ord(chr(char).swapcase()) |
||||
else: |
||||
return char.swapcase() |
||||
|
||||
def HexcodeName2String(hexcodeName): |
||||
return ''.join(map(Hexcode2String, hexcodeName)) |
||||
|
||||
def SwapName(wordExact): |
||||
return map(SwapCase, wordExact) |
||||
|
||||
def UpdateWords(word, wordExact, slash, words, hexcode, allNames, lastName, insideStream, oEntropy, fOut): |
||||
if word != '': |
||||
if slash + word in words: |
||||
words[slash + word][0] += 1 |
||||
if hexcode: |
||||
words[slash + word][1] += 1 |
||||
elif slash == '/' and allNames: |
||||
words[slash + word] = [1, 0] |
||||
if hexcode: |
||||
words[slash + word][1] += 1 |
||||
if slash == '/': |
||||
lastName = slash + word |
||||
if slash == '': |
||||
if word == 'stream': |
||||
insideStream = True |
||||
if word == 'endstream': |
||||
if insideStream == True and oEntropy != None: |
||||
for char in 'endstream': |
||||
oEntropy.removeInsideStream(ord(char)) |
||||
insideStream = False |
||||
if fOut != None: |
||||
if slash == '/' and '/' + word in ('/JS', '/JavaScript', '/AA', '/OpenAction', '/JBIG2Decode', '/RichMedia', '/Launch'): |
||||
wordExactSwapped = HexcodeName2String(SwapName(wordExact)) |
||||
fOut.write(C2BIP3(wordExactSwapped)) |
||||
print('/%s -> /%s' % (HexcodeName2String(wordExact), wordExactSwapped)) |
||||
else: |
||||
fOut.write(C2BIP3(HexcodeName2String(wordExact))) |
||||
return ('', [], False, lastName, insideStream) |
||||
|
||||
class cCVE_2009_3459: |
||||
def __init__(self): |
||||
self.count = 0 |
||||
|
||||
def Check(self, lastName, word): |
||||
if (lastName == '/Colors' and word.isdigit() and int(word) > 2^24): # decided to alert when the number of colors is expressed with more than 3 bytes |
||||
self.count += 1 |
||||
|
||||
def XMLAddAttribute(xmlDoc, name, value=None): |
||||
att = xmlDoc.createAttribute(name) |
||||
xmlDoc.documentElement.setAttributeNode(att) |
||||
if value != None: |
||||
att.nodeValue = value |
||||
|
||||
def PDFiD(file, allNames=False, extraData=False, disarm=False, force=False): |
||||
"""Example of XML output: |
||||
<PDFiD ErrorOccured="False" ErrorMessage="" Filename="test.pdf" Header="%PDF-1.1" IsPDF="True" Version="0.0.4" Entropy="4.28"> |
||||
<Keywords> |
||||
<Keyword Count="7" HexcodeCount="0" Name="obj"/> |
||||
<Keyword Count="7" HexcodeCount="0" Name="endobj"/> |
||||
<Keyword Count="1" HexcodeCount="0" Name="stream"/> |
||||
<Keyword Count="1" HexcodeCount="0" Name="endstream"/> |
||||
<Keyword Count="1" HexcodeCount="0" Name="xref"/> |
||||
<Keyword Count="1" HexcodeCount="0" Name="trailer"/> |
||||
<Keyword Count="1" HexcodeCount="0" Name="startxref"/> |
||||
<Keyword Count="1" HexcodeCount="0" Name="/Page"/> |
||||
<Keyword Count="0" HexcodeCount="0" Name="/Encrypt"/> |
||||
<Keyword Count="1" HexcodeCount="0" Name="/JS"/> |
||||
<Keyword Count="1" HexcodeCount="0" Name="/JavaScript"/> |
||||
<Keyword Count="0" HexcodeCount="0" Name="/AA"/> |
||||
<Keyword Count="1" HexcodeCount="0" Name="/OpenAction"/> |
||||
<Keyword Count="0" HexcodeCount="0" Name="/JBIG2Decode"/> |
||||
</Keywords> |
||||
<Dates> |
||||
<Date Value="D:20090128132916+01'00" Name="/ModDate"/> |
||||
</Dates> |
||||
</PDFiD> |
||||
""" |
||||
|
||||
word = '' |
||||
wordExact = [] |
||||
hexcode = False |
||||
lastName = '' |
||||
insideStream = False |
||||
keywords = ('obj', |
||||
'endobj', |
||||
'stream', |
||||
'endstream', |
||||
'xref', |
||||
'trailer', |
||||
'startxref', |
||||
'/Page', |
||||
'/Encrypt', |
||||
'/ObjStm', |
||||
'/JS', |
||||
'/JavaScript', |
||||
'/AA', |
||||
'/OpenAction', |
||||
'/AcroForm', |
||||
'/JBIG2Decode', |
||||
'/RichMedia', |
||||
'/Launch', |
||||
'/EmbeddedFile', |
||||
'/XFA', |
||||
) |
||||
words = {} |
||||
dates = [] |
||||
for keyword in keywords: |
||||
words[keyword] = [0, 0] |
||||
slash = '' |
||||
xmlDoc = xml.dom.minidom.getDOMImplementation().createDocument(None, 'PDFiD', None) |
||||
XMLAddAttribute(xmlDoc, 'Version', __version__) |
||||
XMLAddAttribute(xmlDoc, 'Filename', file) |
||||
attErrorOccured = XMLAddAttribute(xmlDoc, 'ErrorOccured', 'False') |
||||
attErrorMessage = XMLAddAttribute(xmlDoc, 'ErrorMessage', '') |
||||
|
||||
oPDFDate = None |
||||
oEntropy = None |
||||
oPDFEOF = None |
||||
oCVE_2009_3459 = cCVE_2009_3459() |
||||
try: |
||||
attIsPDF = xmlDoc.createAttribute('IsPDF') |
||||
xmlDoc.documentElement.setAttributeNode(attIsPDF) |
||||
oBinaryFile = cBinaryFile(file) |
||||
if extraData: |
||||
oPDFDate = cPDFDate() |
||||
oEntropy = cEntropy() |
||||
oPDFEOF = cPDFEOF() |
||||
(bytesHeader, pdfHeader) = FindPDFHeaderRelaxed(oBinaryFile) |
||||
if disarm: |
||||
(pathfile, extension) = os.path.splitext(file) |
||||
fOut = open(pathfile + '.disarmed' + extension, 'wb') |
||||
for byteHeader in bytesHeader: |
||||
fOut.write(C2BIP3(chr(byteHeader))) |
||||
else: |
||||
fOut = None |
||||
if oEntropy != None: |
||||
for byteHeader in bytesHeader: |
||||
oEntropy.add(byteHeader, insideStream) |
||||
if pdfHeader == None and not force: |
||||
attIsPDF.nodeValue = 'False' |
||||
return xmlDoc |
||||
else: |
||||
if pdfHeader == None: |
||||
attIsPDF.nodeValue = 'False' |
||||
pdfHeader = '' |
||||
else: |
||||
attIsPDF.nodeValue = 'True' |
||||
att = xmlDoc.createAttribute('Header') |
||||
att.nodeValue = repr(pdfHeader[0:10]).strip("'") |
||||
xmlDoc.documentElement.setAttributeNode(att) |
||||
byte = oBinaryFile.byte() |
||||
while byte != None: |
||||
char = chr(byte) |
||||
charUpper = char.upper() |
||||
if charUpper >= 'A' and charUpper <= 'Z' or charUpper >= '0' and charUpper <= '9': |
||||
word += char |
||||
wordExact.append(char) |
||||
elif slash == '/' and char == '#': |
||||
d1 = oBinaryFile.byte() |
||||
if d1 != None: |
||||
d2 = oBinaryFile.byte() |
||||
if d2 != None and (chr(d1) >= '0' and chr(d1) <= '9' or chr(d1).upper() >= 'A' and chr(d1).upper() <= 'F') and (chr(d2) >= '0' and chr(d2) <= '9' or chr(d2).upper() >= 'A' and chr(d2).upper() <= 'F'): |
||||
word += chr(int(chr(d1) + chr(d2), 16)) |
||||
wordExact.append(int(chr(d1) + chr(d2), 16)) |
||||
hexcode = True |
||||
if oEntropy != None: |
||||
oEntropy.add(d1, insideStream) |
||||
oEntropy.add(d2, insideStream) |
||||
if oPDFEOF != None: |
||||
oPDFEOF.parse(d1) |
||||
oPDFEOF.parse(d2) |
||||
else: |
||||
oBinaryFile.unget(d2) |
||||
oBinaryFile.unget(d1) |
||||
(word, wordExact, hexcode, lastName, insideStream) = UpdateWords(word, wordExact, slash, words, hexcode, allNames, lastName, insideStream, oEntropy, fOut) |
||||
if disarm: |
||||
fOut.write(C2BIP3(char)) |
||||
else: |
||||
oBinaryFile.unget(d1) |
||||
(word, wordExact, hexcode, lastName, insideStream) = UpdateWords(word, wordExact, slash, words, hexcode, allNames, lastName, insideStream, oEntropy, fOut) |
||||
if disarm: |
||||
fOut.write(C2BIP3(char)) |
||||
else: |
||||
oCVE_2009_3459.Check(lastName, word) |
||||
|
||||
(word, wordExact, hexcode, lastName, insideStream) = UpdateWords(word, wordExact, slash, words, hexcode, allNames, lastName, insideStream, oEntropy, fOut) |
||||
if char == '/': |
||||
slash = '/' |
||||
else: |
||||
slash = '' |
||||
if disarm: |
||||
fOut.write(C2BIP3(char)) |
||||
|
||||
if oPDFDate != None and oPDFDate.parse(char) != None: |
||||
dates.append([oPDFDate.date, lastName]) |
||||
|
||||
if oEntropy != None: |
||||
oEntropy.add(byte, insideStream) |
||||
|
||||
if oPDFEOF != None: |
||||
oPDFEOF.parse(char) |
||||
|
||||
byte = oBinaryFile.byte() |
||||
(word, wordExact, hexcode, lastName, insideStream) = UpdateWords(word, wordExact, slash, words, hexcode, allNames, lastName, insideStream, oEntropy, fOut) |
||||
|
||||
# check to see if file ended with %%EOF. If so, we can reset charsAfterLastEOF and add one to EOF count. This is never performed in |
||||
# the parse function because it never gets called due to hitting the end of file. |
||||
if byte == None and oPDFEOF != None: |
||||
if oPDFEOF.token == '%%EOF': |
||||
oPDFEOF.cntEOFs += 1 |
||||
oPDFEOF.cntCharsAfterLastEOF = 0 |
||||
oPDFEOF.token = '' |
||||
|
||||
except SystemExit: |
||||
sys.exit() |
||||
except: |
||||
attErrorOccured.nodeValue = 'True' |
||||
attErrorMessage.nodeValue = traceback.format_exc() |
||||
|
||||
if disarm: |
||||
fOut.close() |
||||
|
||||
attEntropyAll = xmlDoc.createAttribute('TotalEntropy') |
||||
xmlDoc.documentElement.setAttributeNode(attEntropyAll) |
||||
attCountAll = xmlDoc.createAttribute('TotalCount') |
||||
xmlDoc.documentElement.setAttributeNode(attCountAll) |
||||
attEntropyStream = xmlDoc.createAttribute('StreamEntropy') |
||||
xmlDoc.documentElement.setAttributeNode(attEntropyStream) |
||||
attCountStream = xmlDoc.createAttribute('StreamCount') |
||||
xmlDoc.documentElement.setAttributeNode(attCountStream) |
||||
attEntropyNonStream = xmlDoc.createAttribute('NonStreamEntropy') |
||||
xmlDoc.documentElement.setAttributeNode(attEntropyNonStream) |
||||
attCountNonStream = xmlDoc.createAttribute('NonStreamCount') |
||||
xmlDoc.documentElement.setAttributeNode(attCountNonStream) |
||||
if oEntropy != None: |
||||
(countAll, entropyAll , countStream, entropyStream, countNonStream, entropyNonStream) = oEntropy.calc() |
||||
attEntropyAll.nodeValue = '%f' % entropyAll |
||||
attCountAll.nodeValue = '%d' % countAll |
||||
attEntropyStream.nodeValue = '%f' % entropyStream |
||||
attCountStream.nodeValue = '%d' % countStream |
||||
attEntropyNonStream.nodeValue = '%f' % entropyNonStream |
||||
attCountNonStream.nodeValue = '%d' % countNonStream |
||||
else: |
||||
attEntropyAll.nodeValue = '' |
||||
attCountAll.nodeValue = '' |
||||
attEntropyStream.nodeValue = '' |
||||
attCountStream.nodeValue = '' |
||||
attEntropyNonStream.nodeValue = '' |
||||
attCountNonStream.nodeValue = '' |
||||
attCountEOF = xmlDoc.createAttribute('CountEOF') |
||||
xmlDoc.documentElement.setAttributeNode(attCountEOF) |
||||
attCountCharsAfterLastEOF = xmlDoc.createAttribute('CountCharsAfterLastEOF') |
||||
xmlDoc.documentElement.setAttributeNode(attCountCharsAfterLastEOF) |
||||
if oPDFEOF != None: |
||||
attCountEOF.nodeValue = '%d' % oPDFEOF.cntEOFs |
||||
attCountCharsAfterLastEOF.nodeValue = '%d' % oPDFEOF.cntCharsAfterLastEOF |
||||
else: |
||||
attCountEOF.nodeValue = '' |
||||
attCountCharsAfterLastEOF.nodeValue = '' |
||||
|
||||
eleKeywords = xmlDoc.createElement('Keywords') |
||||
xmlDoc.documentElement.appendChild(eleKeywords) |
||||
for keyword in keywords: |
||||
eleKeyword = xmlDoc.createElement('Keyword') |
||||
eleKeywords.appendChild(eleKeyword) |
||||
att = xmlDoc.createAttribute('Name') |
||||
att.nodeValue = keyword |
||||
eleKeyword.setAttributeNode(att) |
||||
att = xmlDoc.createAttribute('Count') |
||||
att.nodeValue = str(words[keyword][0]) |
||||
eleKeyword.setAttributeNode(att) |
||||
att = xmlDoc.createAttribute('HexcodeCount') |
||||
att.nodeValue = str(words[keyword][1]) |
||||
eleKeyword.setAttributeNode(att) |
||||
eleKeyword = xmlDoc.createElement('Keyword') |
||||
eleKeywords.appendChild(eleKeyword) |
||||
att = xmlDoc.createAttribute('Name') |
||||
att.nodeValue = '/Colors > 2^24' |
||||
eleKeyword.setAttributeNode(att) |
||||
att = xmlDoc.createAttribute('Count') |
||||
att.nodeValue = str(oCVE_2009_3459.count) |
||||
eleKeyword.setAttributeNode(att) |
||||
att = xmlDoc.createAttribute('HexcodeCount') |
||||
att.nodeValue = str(0) |
||||
eleKeyword.setAttributeNode(att) |
||||
if allNames: |
||||
keys = sorted(words.keys()) |
||||
for word in keys: |
||||
if not word in keywords: |
||||
eleKeyword = xmlDoc.createElement('Keyword') |
||||
eleKeywords.appendChild(eleKeyword) |
||||
att = xmlDoc.createAttribute('Name') |
||||
att.nodeValue = word |
||||
eleKeyword.setAttributeNode(att) |
||||
att = xmlDoc.createAttribute('Count') |
||||
att.nodeValue = str(words[word][0]) |
||||
eleKeyword.setAttributeNode(att) |
||||
att = xmlDoc.createAttribute('HexcodeCount') |
||||
att.nodeValue = str(words[word][1]) |
||||
eleKeyword.setAttributeNode(att) |
||||
eleDates = xmlDoc.createElement('Dates') |
||||
xmlDoc.documentElement.appendChild(eleDates) |
||||
dates.sort(key=lambda x: x[0]) |
||||
for date in dates: |
||||
eleDate = xmlDoc.createElement('Date') |
||||
eleDates.appendChild(eleDate) |
||||
att = xmlDoc.createAttribute('Value') |
||||
att.nodeValue = date[0] |
||||
eleDate.setAttributeNode(att) |
||||
att = xmlDoc.createAttribute('Name') |
||||
att.nodeValue = date[1] |
||||
eleDate.setAttributeNode(att) |
||||
return xmlDoc |
||||
|
||||
def PDFiD2String(xmlDoc, force): |
||||
result = 'PDFiD %s %s\n' % (xmlDoc.documentElement.getAttribute('Version'), xmlDoc.documentElement.getAttribute('Filename')) |
||||
if xmlDoc.documentElement.getAttribute('ErrorOccured') == 'True': |
||||
return result + '***Error occured***\n%s\n' % xmlDoc.documentElement.getAttribute('ErrorMessage') |
||||
if not force and xmlDoc.documentElement.getAttribute('IsPDF') == 'False': |
||||
return result + ' Not a PDF document\n' |
||||
result += ' PDF Header: %s\n' % xmlDoc.documentElement.getAttribute('Header') |
||||
for node in xmlDoc.documentElement.getElementsByTagName('Keywords')[0].childNodes: |
||||
result += ' %-16s %7d' % (node.getAttribute('Name'), int(node.getAttribute('Count'))) |
||||
if int(node.getAttribute('HexcodeCount')) > 0: |
||||
result += '(%d)' % int(node.getAttribute('HexcodeCount')) |
||||
result += '\n' |
||||
if xmlDoc.documentElement.getAttribute('CountEOF') != '': |
||||
result += ' %-16s %7d\n' % ('%%EOF', int(xmlDoc.documentElement.getAttribute('CountEOF'))) |
||||
if xmlDoc.documentElement.getAttribute('CountCharsAfterLastEOF') != '': |
||||
result += ' %-16s %7d\n' % ('After last %%EOF', int(xmlDoc.documentElement.getAttribute('CountCharsAfterLastEOF'))) |
||||
for node in xmlDoc.documentElement.getElementsByTagName('Dates')[0].childNodes: |
||||
result += ' %-23s %s\n' % (node.getAttribute('Value'), node.getAttribute('Name')) |
||||
if xmlDoc.documentElement.getAttribute('TotalEntropy') != '': |
||||
result += ' Total entropy: %s (%10s bytes)\n' % (xmlDoc.documentElement.getAttribute('TotalEntropy'), xmlDoc.documentElement.getAttribute('TotalCount')) |
||||
if xmlDoc.documentElement.getAttribute('StreamEntropy') != '': |
||||
result += ' Entropy inside streams: %s (%10s bytes)\n' % (xmlDoc.documentElement.getAttribute('StreamEntropy'), xmlDoc.documentElement.getAttribute('StreamCount')) |
||||
if xmlDoc.documentElement.getAttribute('NonStreamEntropy') != '': |
||||
result += ' Entropy outside streams: %s (%10s bytes)\n' % (xmlDoc.documentElement.getAttribute('NonStreamEntropy'), xmlDoc.documentElement.getAttribute('NonStreamCount')) |
||||
return result |
||||
|
||||
class cCount(): |
||||
def __init__(self, count, hexcode): |
||||
self.count = count |
||||
self.hexcode = hexcode |
||||
|
||||
class cPDFiD(): |
||||
def __init__(self, xmlDoc, force): |
||||
self.version = xmlDoc.documentElement.getAttribute('Version') |
||||
self.filename = xmlDoc.documentElement.getAttribute('Filename') |
||||
self.errorOccured = xmlDoc.documentElement.getAttribute('ErrorOccured') == 'True' |
||||
self.errorMessage = xmlDoc.documentElement.getAttribute('ErrorMessage') |
||||
self.isPDF = None |
||||
if self.errorOccured: |
||||
return |
||||
self.isPDF = xmlDoc.documentElement.getAttribute('IsPDF') == 'True' |
||||
if not force and not self.isPDF: |
||||
return |
||||
self.header = xmlDoc.documentElement.getAttribute('Header') |
||||
self.keywords = {} |
||||
for node in xmlDoc.documentElement.getElementsByTagName('Keywords')[0].childNodes: |
||||
self.keywords[node.getAttribute('Name')] = cCount(int(node.getAttribute('Count')), int(node.getAttribute('HexcodeCount'))) |
||||
self.obj = self.keywords['obj'] |
||||
self.endobj = self.keywords['endobj'] |
||||
self.stream = self.keywords['stream'] |
||||
self.endstream = self.keywords['endstream'] |
||||
self.xref = self.keywords['xref'] |
||||
self.trailer = self.keywords['trailer'] |
||||
self.startxref = self.keywords['startxref'] |
||||
self.page = self.keywords['/Page'] |
||||
self.encrypt = self.keywords['/Encrypt'] |
||||
self.objstm = self.keywords['/ObjStm'] |
||||
self.js = self.keywords['/JS'] |
||||
self.javascript = self.keywords['/JavaScript'] |
||||
self.aa = self.keywords['/AA'] |
||||
self.openaction = self.keywords['/OpenAction'] |
||||
self.acroform = self.keywords['/AcroForm'] |
||||
self.jbig2decode = self.keywords['/JBIG2Decode'] |
||||
self.richmedia = self.keywords['/RichMedia'] |
||||
self.launch = self.keywords['/Launch'] |
||||
self.embeddedfile = self.keywords['/EmbeddedFile'] |
||||
self.xfa = self.keywords['/XFA'] |
||||
self.colors_gt_2_24 = self.keywords['/Colors > 2^24'] |
||||
|
||||
def Print(lines, options): |
||||
print(lines) |
||||
filename = None |
||||
if options.scan: |
||||
filename = 'PDFiD.log' |
||||
if options.output != '': |
||||
filename = options.output |
||||
if filename: |
||||
logfile = open(filename, 'a') |
||||
logfile.write(lines + '\n') |
||||
logfile.close() |
||||
|
||||
def Quote(value, separator, quote): |
||||
if isinstance(value, str): |
||||
if separator in value: |
||||
return quote + value + quote |
||||
return value |
||||
|
||||
def MakeCSVLine(fields, separator=';', quote='"'): |
||||
formatstring = separator.join([field[0] for field in fields]) |
||||
strings = [Quote(field[1], separator, quote) for field in fields] |
||||
return formatstring % tuple(strings) |
||||
|
||||
def ProcessFile(filename, options, plugins): |
||||
xmlDoc = PDFiD(filename, options.all, options.extra, options.disarm, options.force) |
||||
if plugins == [] and options.select == '': |
||||
Print(PDFiD2String(xmlDoc, options.force), options) |
||||
return |
||||
|
||||
oPDFiD = cPDFiD(xmlDoc, options.force) |
||||
if options.select: |
||||
if options.force or not oPDFiD.errorOccured and oPDFiD.isPDF: |
||||
pdf = oPDFiD |
||||
try: |
||||
selected = eval(options.select) |
||||
except Exception as e: |
||||
Print('Error evaluating select expression: %s' % options.select, options) |
||||
if options.verbose: |
||||
raise e |
||||
return |
||||
if selected: |
||||
if options.csv: |
||||
Print(filename, options) |
||||
else: |
||||
Print(PDFiD2String(xmlDoc, options.force), options) |
||||
else: |
||||
for cPlugin in plugins: |
||||
if not cPlugin.onlyValidPDF or not oPDFiD.errorOccured and oPDFiD.isPDF: |
||||
try: |
||||
oPlugin = cPlugin(oPDFiD) |
||||
except Exception as e: |
||||
Print('Error instantiating plugin: %s' % cPlugin.name, options) |
||||
if options.verbose: |
||||
raise e |
||||
return |
||||
|
||||
try: |
||||
score = oPlugin.Score() |
||||
except Exception as e: |
||||
Print('Error running plugin: %s' % cPlugin.name, options) |
||||
if options.verbose: |
||||
raise e |
||||
return |
||||
|
||||
if options.csv: |
||||
if score >= options.minimumscore: |
||||
Print(MakeCSVLine((('%s', filename), ('%s', cPlugin.name), ('%.02f', score))), options) |
||||
else: |
||||
if score >= options.minimumscore: |
||||
Print(PDFiD2String(xmlDoc, options.force), options) |
||||
Print('%s score: %.02f' % (cPlugin.name, score), options) |
||||
else: |
||||
if options.csv: |
||||
if oPDFiD.errorOccured: |
||||
Print(MakeCSVLine((('%s', filename), ('%s', cPlugin.name), ('%s', 'Error occured'))), options) |
||||
if not oPDFiD.isPDF: |
||||
Print(MakeCSVLine((('%s', filename), ('%s', cPlugin.name), ('%s', 'Not a PDF document'))), options) |
||||
else: |
||||
Print(PDFiD2String(xmlDoc, options.force), options) |
||||
|
||||
|
||||
def Scan(directory, options, plugins): |
||||
try: |
||||
if os.path.isdir(directory): |
||||
for entry in os.listdir(directory): |
||||
Scan(os.path.join(directory, entry), options, plugins) |
||||
else: |
||||
ProcessFile(directory, options, plugins) |
||||
except Exception as e: |
||||
# print directory |
||||
print(e) |
||||
# print(sys.exc_info()[2]) |
||||
# print traceback.format_exc() |
||||
|
||||
#function derived from: http://blog.9bplus.com/pdfidpy-output-to-json |
||||
def PDFiD2JSON(xmlDoc, force): |
||||
#Get Top Layer Data |
||||
errorOccured = xmlDoc.documentElement.getAttribute('ErrorOccured') |
||||
errorMessage = xmlDoc.documentElement.getAttribute('ErrorMessage') |
||||
filename = xmlDoc.documentElement.getAttribute('Filename') |
||||
header = xmlDoc.documentElement.getAttribute('Header') |
||||
isPdf = xmlDoc.documentElement.getAttribute('IsPDF') |
||||
version = xmlDoc.documentElement.getAttribute('Version') |
||||
entropy = xmlDoc.documentElement.getAttribute('Entropy') |
||||
|
||||
#extra data |
||||
countEof = xmlDoc.documentElement.getAttribute('CountEOF') |
||||
countChatAfterLastEof = xmlDoc.documentElement.getAttribute('CountCharsAfterLastEOF') |
||||
totalEntropy = xmlDoc.documentElement.getAttribute('TotalEntropy') |
||||
streamEntropy = xmlDoc.documentElement.getAttribute('StreamEntropy') |
||||