[NOT WORKING] many changes in order to make the scripts more robust

pull/10/head
Raphaël Vinot 2013-02-08 15:38:13 +01:00
parent 68e7756f7a
commit 880e1f3a28
3 changed files with 235 additions and 103 deletions

1
TODO
View File

@ -23,3 +23,4 @@ TODO
* scripts to generate a SD card automatically (win/mac/linux)
* move the scripts away from /opt/
* strip back libreoffice to minimum required packages. in particular, if possible, remove libreoffice-java-common package
* Write the groomer log on the destination key

View File

@ -0,0 +1,166 @@
#!/bin/bash
set -e
set -x
# groom da kitteh!
SRC='/dev/sdb'
PARTITIONS=`ls '${SRC}' | grep '${SRC}[1-9][0-6]*'`
DST='/dev/sdc1'
GH=/opt/groomer/
JAVA=/usr/bin/java
pdfCopyDirty()
{
# copy all pdf's over to their relative same locations
find $1 -iname "*.pdf" -printf 'X=`echo %h | sed -f $GH/sedKillSpace -e s:${1}::`; mkdir -p ${2}${X}; F=`echo %f | sed -f $GH/sedKillSpace`; cp -fv "%p" ${2}$X/$F \n' | while read l; do eval $l; done
# extract all the txt we can from potentially evil pdf's
find $2 -iname "*.pdf" -printf 'echo %p extracting text to %p-extracted.txt; $JAVA -jar $GH/pdfbox-app-1.7.1.jar ExtractText %p %p-extracted.txt 2> /dev/null \n' | while read l; do eval $l; done
}
pdfCopyClean()
{
# convert pdf's on the fly from src to relative dst location
find $1 -iname "*.pdf" -printf 'X=`echo %h | sed -f $GH/sedKillSpace -e s:${1}::`; mkdir -p ${2}${X}; F=`echo %f | sed -f $GH/sedKillSpace`; echo "%p" extracting text to ${2}$X/$F-extracted.txt; $JAVA -jar $GH/pdfbox-app-1.7.1.jar ExtractText "%p" ${2}$X/$F-extracted.txt 2> /dev/null \n' | while read l; do eval $l; done
}
copySafeFiles()
{
TYPES="\
jpg jpeg gif png tif tga raw \
mp4 avi mov \
mp3 wav \
txt xml csv tsv \
"
for type in $TYPES
do
find $1 -iname "*.$type" -printf 'X=`echo %h | sed -f $GH/sedKillSpace -e s:${1}::`; mkdir -p ${2}${X}; F=`echo %f | sed -f $GH/sedKillSpace`; cp -fv "%p" ${2}$X/$F \n' | while read l; do eval $l; done
done
}
convertCopyFiles()
{
# wordy documents
TYPES="doc docx odt sxw rtf wpd htm html"
FILTER=Text; OUT=txt
convertCopyFilesHelper $1 $2 $3 $TYPES $OUT $FILTER
# spreadsheets
TYPES="xls xslx ods sxc"
FILTER=calc_pdf_Export; OUT=pdf
convertCopyFilesHelper $1 $2 $3 $TYPES $OUT $FILTER
# presentation files
TYPES="ppt pptx odp sxi"
FILTER=impress_pdf_Export; OUT=pdf
convertCopyFilesHelper $1 $2 $3 $TYPES $OUT $FILTER
}
convertCopyFilesHelper()
{
for type in $TYPES
do
find $1 -iname "*.$type" -printf 'X=`echo %h | sed -f $GH/sedKillSpace -e s:${1}::`; mkdir -p ${3}${X}; F=`echo %f | sed -f $GH/sedKillSpace`; cp -fv "%p" ${3}$X/$F \n' | while read l; do eval $l; done
find $3 -iname "*.$type" -printf 'X=`echo %h | sed s:${3}::`; mkdir -p ${2}${X}; soffice --headless --convert-to ${type}-extraced.$OUT:$FILTER %p --outdir ${2}${X} \n' | while read l; do eval $l; done
done
}
unpackZip()
{
find $1 -iname "*.zip" -printf 'X=`echo %h | sed -f $GH/sedKillSpace -e s:${1}::`; mkdir -p ${3}${X}; F=`echo %f | sed -f $GH/sedKillSpace`; cp -fv "%p" ${3}$X/$F \n' | while read l; do eval $l; done
find $3 -iname "*.zip" -printf 'X=`echo %h | sed s:${3}::`; mkdir -p ${ZIPTEMP}/${X}/UNZIPPED_%f/; unzip "%p" -d ${ZIPTEMP}${X}/UNZIPPED_%f/ 2> /dev/null; \n' | while read l; do eval $l; done
find $3 -iname "*.zip" -printf 'rm -rf %p \n' | while read l; do eval $l; done
if [ -d ${ZIPTEMP} ]; then
if [ $COPYDIRTYPDF -eq 1 ]; then
pdfCopyDirty $ZIPTEMP $targetDir
else
pdfCopyClean $ZIPTEMP $targetDir
fi
copySafeFiles $ZIPTEMP $2 $3
convertCopyFiles $ZIPTEMP $2 $3
rm -rf ${TEMP}/*
rm -rf ${ZIPTEMP}/*
fi
}
SRC=/src
DST=/dst
if [ ! -d $SRC ]; then
mkdir $SRC
fi
if [ ! -d $DST ]; then
mkdir $DST
fi
TEMP=/dst/temp
ZIPTEMP=/dst/ziptemp
FL=${DST}/filelist.txt
umount $DST 2> /dev/null
mount /dev/sdb1 $DST
if [ $? -ne 0 ]; then
# echo Could not mount target USB stick!
exit 1
else
echo Target USB device mounted at $DST
rm -rf $DST/FROM_PARTITION_*
# mount temp and make sure it's empty
mkdir -p $TEMP
mkdir -p $ZIPTEMP
rm -rf ${TEMP}/*
rm -rf ${ZIPTEMP}/*
echo Full file list from source USB > $FL
fi
COPYDIRTYPDF=0
PARTCOUNT=1
PARTITIONS=`ls /dev/sda* | grep '/dev/sda[1-9][0-6]*'`
for partition in $PARTITIONS
do
echo Processing partition: ${PARTCOUNT} $partition
umount $SRC 2> /dev/null
mount -r $partition $SRC
if [ $? -ne 0 ]; then
echo could not mount $partition at /$SRC
else
echo $partition mounted at $SRC
echo PARTITION $PARTCOUNT >> $FL
find $SRC/* -printf 'echo %p | sed s:$SRC:: >> $FL \n' | while read l; do eval $l; done
# create a director on sdb named PARTION_n
targetDir=${DST}/FROM_PARTITION_${PARTCOUNT}
echo copying to: $targetDir
mkdir -p $targetDir
if [ $COPYDIRTYPDF -eq 1 ]; then
pdfCopyDirty $SRC $targetDir
else
pdfCopyClean $SRC $targetDir
fi
# copy stuff
copySafeFiles $SRC $targetDir
convertCopyFiles $SRC $targetDir $TEMP
rm -rf ${TEMP}/*
# unpack and process archives
unpackZip $SRC $targetDir $TEMP
fi
let PARTCOUNT=$PARTCOUNT+1
done
#cleanup
rm -rf ${TEMP}*
rm -rf ${ZIPTEMP}*
sync
umount $SRC
umount $DST
/sbin/shutdown -h now

View File

@ -1,85 +1,42 @@
#!/bin/bash
set -e
set -x
#Constraints
DEV_SRC='/dev/sdf'
DEV_DST='/dev/sdg1'
HOME=testing
clean(){
echo Cleaning.
}
trap clean EXIT TERM INT
# groom da kitteh!
GH=/opt/groomer/
JAVA=/usr/bin/java
if [ ! -b ${DEV_SRC} ]; then
echo 'Source device ('${DEV_SRC}') does not exists.'
exit
fi
pdfCopyDirty()
{
# copy all pdf's over to their relative same locations
find $1 -iname "*.pdf" -printf 'X=`echo %h | sed -f $GH/sedKillSpace -e s:${1}::`; mkdir -p ${2}${X}; F=`echo %f | sed -f $GH/sedKillSpace`; cp -fv "%p" ${2}$X/$F \n' | while read l; do eval $l; done
# extract all the txt we can from potentially evil pdf's
find $2 -iname "*.pdf" -printf 'echo %p extracting text to %p-extracted.txt; $JAVA -jar $GH/pdfbox-app-1.7.1.jar ExtractText %p %p-extracted.txt 2> /dev/null \n' | while read l; do eval $l; done
}
DEV_PARTITIONS=`ls ${DEV_SRC}* | grep ${DEV_SRC}'[1-9][0-6]*' || true`
if [ -z ${DEV_PARTITIONS} ]; then
echo ${DEV_SRC} 'does not have any partitions.'
exit
fi
pdfCopyClean()
{
# convert pdf's on the fly from src to relative dst location
find $1 -iname "*.pdf" -printf 'X=`echo %h | sed -f $GH/sedKillSpace -e s:${1}::`; mkdir -p ${2}${X}; F=`echo %f | sed -f $GH/sedKillSpace`; echo "%p" extracting text to ${2}$X/$F-extracted.txt; $JAVA -jar $GH/pdfbox-app-1.7.1.jar ExtractText "%p" ${2}$X/$F-extracted.txt 2> /dev/null \n' | while read l; do eval $l; done
}
if [ ! -b ${DEV_DST} ]; then
echo 'Destination device ('${DEV_DST}') does not exists.'
exit
fi
copySafeFiles()
{
TYPES="\
jpg jpeg gif png tif tga raw \
mp4 avi mov \
mp3 wav \
txt xml csv tsv \
"
for type in $TYPES
do
find $1 -iname "*.$type" -printf 'X=`echo %h | sed -f $GH/sedKillSpace -e s:${1}::`; mkdir -p ${2}${X}; F=`echo %f | sed -f $GH/sedKillSpace`; cp -fv "%p" ${2}$X/$F \n' | while read l; do eval $l; done
done
}
convertCopyFiles()
{
# wordy documents
TYPES="doc docx odt sxw rtf wpd htm html"
FILTER=Text; OUT=txt
convertCopyFilesHelper $1 $2 $3 $TYPES $OUT $FILTER
SRC=${HOME}/src
DST=${HOME}/dst
# spreadsheets
TYPES="xls xslx ods sxc"
FILTER=calc_pdf_Export; OUT=pdf
convertCopyFilesHelper $1 $2 $3 $TYPES $OUT $FILTER
# presentation files
TYPES="ppt pptx odp sxi"
FILTER=impress_pdf_Export; OUT=pdf
convertCopyFilesHelper $1 $2 $3 $TYPES $OUT $FILTER
}
convertCopyFilesHelper()
{
for type in $TYPES
do
find $1 -iname "*.$type" -printf 'X=`echo %h | sed -f $GH/sedKillSpace -e s:${1}::`; mkdir -p ${3}${X}; F=`echo %f | sed -f $GH/sedKillSpace`; cp -fv "%p" ${3}$X/$F \n' | while read l; do eval $l; done
find $3 -iname "*.$type" -printf 'X=`echo %h | sed s:${3}::`; mkdir -p ${2}${X}; soffice --headless --convert-to ${type}-extraced.$OUT:$FILTER %p --outdir ${2}${X} \n' | while read l; do eval $l; done
done
}
unpackZip()
{
find $1 -iname "*.zip" -printf 'X=`echo %h | sed -f $GH/sedKillSpace -e s:${1}::`; mkdir -p ${3}${X}; F=`echo %f | sed -f $GH/sedKillSpace`; cp -fv "%p" ${3}$X/$F \n' | while read l; do eval $l; done
find $3 -iname "*.zip" -printf 'X=`echo %h | sed s:${3}::`; mkdir -p ${ZIPTEMP}/${X}/UNZIPPED_%f/; unzip "%p" -d ${ZIPTEMP}${X}/UNZIPPED_%f/ 2> /dev/null; \n' | while read l; do eval $l; done
find $3 -iname "*.zip" -printf 'rm -rf %p \n' | while read l; do eval $l; done
if [ -d ${ZIPTEMP} ]; then
if [ $COPYDIRTYPDF -eq 1 ]; then
pdfCopyDirty $ZIPTEMP $targetDir
else
pdfCopyClean $ZIPTEMP $targetDir
fi
copySafeFiles $ZIPTEMP $2 $3
convertCopyFiles $ZIPTEMP $2 $3
rm -rf ${TEMP}/*
rm -rf ${ZIPTEMP}/*
fi
}
SRC=/src
DST=/dst
if [ ! -d $SRC ]; then
mkdir $SRC
fi
@ -87,17 +44,21 @@ if [ ! -d $DST ]; then
mkdir $DST
fi
TEMP=/dst/temp
ZIPTEMP=/dst/ziptemp
if mount|grep $DST; then
umount $DST || true
fi
TEMP=${DST}/temp
ZIPTEMP=${DST}/ziptemp
FL=${DST}/filelist.txt
umount $DST 2> /dev/null
mount /dev/sdb1 $DST
mount ${DEV_DST} $DST
if [ $? -ne 0 ]; then
# echo Could not mount target USB stick!
echo Unable to mount ${DEV_DST} on $DST
exit 1
else
echo Target USB device mounted at $DST
echo 'Target USB device ('${DEV_DST}') mounted at $DST'
rm -rf $DST/FROM_PARTITION_*
# mount temp and make sure it's empty
@ -112,38 +73,42 @@ fi
COPYDIRTYPDF=0
PARTCOUNT=1
PARTITIONS=`ls /dev/sda* | grep '/dev/sda[1-9][0-6]*'`
for partition in $PARTITIONS
for partition in $DEV_PARTITIONS
do
echo Processing partition: ${PARTCOUNT} $partition
umount $SRC 2> /dev/null
echo Processing partition: ${partition}
if mount|grep $SRC; then
umount $SRC 2> /dev/null
fi
mount -r $partition $SRC
if [ $? -ne 0 ]; then
echo could not mount $partition at /$SRC
echo Unable to mount ${partition} on $SRC
else
echo $partition mounted at $SRC
echo $partition mounted at $SRC
echo PARTITION $PARTCOUNT >> $FL
find $SRC/* -printf 'echo %p | sed s:$SRC:: >> $FL \n' | while read l; do eval $l; done
echo PARTITION $PARTCOUNT >> $FL
# FIXME: eval probably insecure
find ${SRC}/* -printf 'echo "%p" | sed s:'${SRC}':: >> '${FL}' \n' | \
while read l; do eval $l; done
# create a director on sdb named PARTION_n
targetDir=${DST}/FROM_PARTITION_${PARTCOUNT}
echo copying to: $targetDir
mkdir -p $targetDir
# create a director on sdb named PARTION_n
targetDir=${DST}/FROM_PARTITION_${PARTCOUNT}
echo copying to: $targetDir
mkdir -p $targetDir
if [ $COPYDIRTYPDF -eq 1 ]; then
pdfCopyDirty $SRC $targetDir
else
pdfCopyClean $SRC $targetDir
fi
#if [ $COPYDIRTYPDF -eq 1 ]; then
# pdfCopyDirty $SRC $targetDir
#else
# pdfCopyClean $SRC $targetDir
#fi
# copy stuff
copySafeFiles $SRC $targetDir
convertCopyFiles $SRC $targetDir $TEMP
rm -rf ${TEMP}/*
# copy stuff
#copySafeFiles $SRC $targetDir
#convertCopyFiles $SRC $targetDir $TEMP
#rm -rf ${TEMP}/*
# unpack and process archives
unpackZip $SRC $targetDir $TEMP
# unpack and process archives
#unpackZip $SRC $targetDir $TEMP
fi
let PARTCOUNT=$PARTCOUNT+1
done
@ -155,5 +120,5 @@ sync
umount $SRC
umount $DST
/sbin/shutdown -h now
#/sbin/shutdown -h now