diff --git a/TODO b/TODO index b605c01..26c031c 100644 --- a/TODO +++ b/TODO @@ -23,3 +23,4 @@ TODO * scripts to generate a SD card automatically (win/mac/linux) * move the scripts away from /opt/ * strip back libreoffice to minimum required packages. in particular, if possible, remove libreoffice-java-common package +* Write the groomer log on the destination key diff --git a/filesystem/opt/groomer/g_function.sh b/filesystem/opt/groomer/g_function.sh new file mode 100755 index 0000000..73d18ed --- /dev/null +++ b/filesystem/opt/groomer/g_function.sh @@ -0,0 +1,166 @@ +#!/bin/bash + +set -e +set -x + +# groom da kitteh! + +SRC='/dev/sdb' +PARTITIONS=`ls '${SRC}' | grep '${SRC}[1-9][0-6]*'` +DST='/dev/sdc1' + +GH=/opt/groomer/ +JAVA=/usr/bin/java + +pdfCopyDirty() +{ + # copy all pdf's over to their relative same locations + find $1 -iname "*.pdf" -printf 'X=`echo %h | sed -f $GH/sedKillSpace -e s:${1}::`; mkdir -p ${2}${X}; F=`echo %f | sed -f $GH/sedKillSpace`; cp -fv "%p" ${2}$X/$F \n' | while read l; do eval $l; done + # extract all the txt we can from potentially evil pdf's + find $2 -iname "*.pdf" -printf 'echo %p extracting text to %p-extracted.txt; $JAVA -jar $GH/pdfbox-app-1.7.1.jar ExtractText %p %p-extracted.txt 2> /dev/null \n' | while read l; do eval $l; done +} + +pdfCopyClean() +{ + # convert pdf's on the fly from src to relative dst location + find $1 -iname "*.pdf" -printf 'X=`echo %h | sed -f $GH/sedKillSpace -e s:${1}::`; mkdir -p ${2}${X}; F=`echo %f | sed -f $GH/sedKillSpace`; echo "%p" extracting text to ${2}$X/$F-extracted.txt; $JAVA -jar $GH/pdfbox-app-1.7.1.jar ExtractText "%p" ${2}$X/$F-extracted.txt 2> /dev/null \n' | while read l; do eval $l; done +} + +copySafeFiles() +{ + TYPES="\ + jpg jpeg gif png tif tga raw \ + mp4 avi mov \ + mp3 wav \ + txt xml csv tsv \ + " + for type in $TYPES + do + find $1 -iname "*.$type" -printf 'X=`echo %h | sed -f $GH/sedKillSpace -e s:${1}::`; mkdir -p ${2}${X}; F=`echo %f | sed -f $GH/sedKillSpace`; cp -fv "%p" ${2}$X/$F \n' | while read l; do eval $l; done + done +} + +convertCopyFiles() +{ + # wordy documents + TYPES="doc docx odt sxw rtf wpd htm html" + FILTER=Text; OUT=txt + convertCopyFilesHelper $1 $2 $3 $TYPES $OUT $FILTER + + # spreadsheets + TYPES="xls xslx ods sxc" + FILTER=calc_pdf_Export; OUT=pdf + convertCopyFilesHelper $1 $2 $3 $TYPES $OUT $FILTER + + # presentation files + TYPES="ppt pptx odp sxi" + FILTER=impress_pdf_Export; OUT=pdf + convertCopyFilesHelper $1 $2 $3 $TYPES $OUT $FILTER +} +convertCopyFilesHelper() +{ + for type in $TYPES + do + find $1 -iname "*.$type" -printf 'X=`echo %h | sed -f $GH/sedKillSpace -e s:${1}::`; mkdir -p ${3}${X}; F=`echo %f | sed -f $GH/sedKillSpace`; cp -fv "%p" ${3}$X/$F \n' | while read l; do eval $l; done + find $3 -iname "*.$type" -printf 'X=`echo %h | sed s:${3}::`; mkdir -p ${2}${X}; soffice --headless --convert-to ${type}-extraced.$OUT:$FILTER %p --outdir ${2}${X} \n' | while read l; do eval $l; done + done +} + +unpackZip() +{ + find $1 -iname "*.zip" -printf 'X=`echo %h | sed -f $GH/sedKillSpace -e s:${1}::`; mkdir -p ${3}${X}; F=`echo %f | sed -f $GH/sedKillSpace`; cp -fv "%p" ${3}$X/$F \n' | while read l; do eval $l; done + find $3 -iname "*.zip" -printf 'X=`echo %h | sed s:${3}::`; mkdir -p ${ZIPTEMP}/${X}/UNZIPPED_%f/; unzip "%p" -d ${ZIPTEMP}${X}/UNZIPPED_%f/ 2> /dev/null; \n' | while read l; do eval $l; done + find $3 -iname "*.zip" -printf 'rm -rf %p \n' | while read l; do eval $l; done + + if [ -d ${ZIPTEMP} ]; then + if [ $COPYDIRTYPDF -eq 1 ]; then + pdfCopyDirty $ZIPTEMP $targetDir + else + pdfCopyClean $ZIPTEMP $targetDir + fi + copySafeFiles $ZIPTEMP $2 $3 + convertCopyFiles $ZIPTEMP $2 $3 + rm -rf ${TEMP}/* + rm -rf ${ZIPTEMP}/* + fi +} + +SRC=/src +DST=/dst +if [ ! -d $SRC ]; then + mkdir $SRC +fi +if [ ! -d $DST ]; then + mkdir $DST +fi + +TEMP=/dst/temp +ZIPTEMP=/dst/ziptemp +FL=${DST}/filelist.txt + +umount $DST 2> /dev/null +mount /dev/sdb1 $DST +if [ $? -ne 0 ]; then +# echo Could not mount target USB stick! + exit 1 +else + echo Target USB device mounted at $DST + rm -rf $DST/FROM_PARTITION_* + + # mount temp and make sure it's empty + mkdir -p $TEMP + mkdir -p $ZIPTEMP + + rm -rf ${TEMP}/* + rm -rf ${ZIPTEMP}/* + + echo Full file list from source USB > $FL +fi + +COPYDIRTYPDF=0 +PARTCOUNT=1 +PARTITIONS=`ls /dev/sda* | grep '/dev/sda[1-9][0-6]*'` +for partition in $PARTITIONS +do + echo Processing partition: ${PARTCOUNT} $partition + umount $SRC 2> /dev/null + mount -r $partition $SRC + if [ $? -ne 0 ]; then + echo could not mount $partition at /$SRC + else + echo $partition mounted at $SRC + + echo PARTITION $PARTCOUNT >> $FL + find $SRC/* -printf 'echo %p | sed s:$SRC:: >> $FL \n' | while read l; do eval $l; done + + # create a director on sdb named PARTION_n + targetDir=${DST}/FROM_PARTITION_${PARTCOUNT} + echo copying to: $targetDir + mkdir -p $targetDir + + if [ $COPYDIRTYPDF -eq 1 ]; then + pdfCopyDirty $SRC $targetDir + else + pdfCopyClean $SRC $targetDir + fi + + # copy stuff + copySafeFiles $SRC $targetDir + convertCopyFiles $SRC $targetDir $TEMP + rm -rf ${TEMP}/* + + # unpack and process archives + unpackZip $SRC $targetDir $TEMP + fi + let PARTCOUNT=$PARTCOUNT+1 +done + +#cleanup +rm -rf ${TEMP}* +rm -rf ${ZIPTEMP}* +sync +umount $SRC +umount $DST + +/sbin/shutdown -h now + diff --git a/filesystem/opt/groomer/groomer.sh b/filesystem/opt/groomer/groomer.sh index d58fbb7..44880f3 100755 --- a/filesystem/opt/groomer/groomer.sh +++ b/filesystem/opt/groomer/groomer.sh @@ -1,85 +1,42 @@ #!/bin/bash +set -e +set -x + +#Constraints +DEV_SRC='/dev/sdf' +DEV_DST='/dev/sdg1' +HOME=testing + + +clean(){ + echo Cleaning. +} + +trap clean EXIT TERM INT + # groom da kitteh! -GH=/opt/groomer/ -JAVA=/usr/bin/java +if [ ! -b ${DEV_SRC} ]; then + echo 'Source device ('${DEV_SRC}') does not exists.' + exit +fi -pdfCopyDirty() -{ - # copy all pdf's over to their relative same locations - find $1 -iname "*.pdf" -printf 'X=`echo %h | sed -f $GH/sedKillSpace -e s:${1}::`; mkdir -p ${2}${X}; F=`echo %f | sed -f $GH/sedKillSpace`; cp -fv "%p" ${2}$X/$F \n' | while read l; do eval $l; done - # extract all the txt we can from potentially evil pdf's - find $2 -iname "*.pdf" -printf 'echo %p extracting text to %p-extracted.txt; $JAVA -jar $GH/pdfbox-app-1.7.1.jar ExtractText %p %p-extracted.txt 2> /dev/null \n' | while read l; do eval $l; done -} +DEV_PARTITIONS=`ls ${DEV_SRC}* | grep ${DEV_SRC}'[1-9][0-6]*' || true` +if [ -z ${DEV_PARTITIONS} ]; then + echo ${DEV_SRC} 'does not have any partitions.' + exit +fi -pdfCopyClean() -{ - # convert pdf's on the fly from src to relative dst location - find $1 -iname "*.pdf" -printf 'X=`echo %h | sed -f $GH/sedKillSpace -e s:${1}::`; mkdir -p ${2}${X}; F=`echo %f | sed -f $GH/sedKillSpace`; echo "%p" extracting text to ${2}$X/$F-extracted.txt; $JAVA -jar $GH/pdfbox-app-1.7.1.jar ExtractText "%p" ${2}$X/$F-extracted.txt 2> /dev/null \n' | while read l; do eval $l; done -} +if [ ! -b ${DEV_DST} ]; then + echo 'Destination device ('${DEV_DST}') does not exists.' + exit +fi -copySafeFiles() -{ - TYPES="\ - jpg jpeg gif png tif tga raw \ - mp4 avi mov \ - mp3 wav \ - txt xml csv tsv \ - " - for type in $TYPES - do - find $1 -iname "*.$type" -printf 'X=`echo %h | sed -f $GH/sedKillSpace -e s:${1}::`; mkdir -p ${2}${X}; F=`echo %f | sed -f $GH/sedKillSpace`; cp -fv "%p" ${2}$X/$F \n' | while read l; do eval $l; done - done -} -convertCopyFiles() -{ - # wordy documents - TYPES="doc docx odt sxw rtf wpd htm html" - FILTER=Text; OUT=txt - convertCopyFilesHelper $1 $2 $3 $TYPES $OUT $FILTER +SRC=${HOME}/src +DST=${HOME}/dst - # spreadsheets - TYPES="xls xslx ods sxc" - FILTER=calc_pdf_Export; OUT=pdf - convertCopyFilesHelper $1 $2 $3 $TYPES $OUT $FILTER - - # presentation files - TYPES="ppt pptx odp sxi" - FILTER=impress_pdf_Export; OUT=pdf - convertCopyFilesHelper $1 $2 $3 $TYPES $OUT $FILTER -} -convertCopyFilesHelper() -{ - for type in $TYPES - do - find $1 -iname "*.$type" -printf 'X=`echo %h | sed -f $GH/sedKillSpace -e s:${1}::`; mkdir -p ${3}${X}; F=`echo %f | sed -f $GH/sedKillSpace`; cp -fv "%p" ${3}$X/$F \n' | while read l; do eval $l; done - find $3 -iname "*.$type" -printf 'X=`echo %h | sed s:${3}::`; mkdir -p ${2}${X}; soffice --headless --convert-to ${type}-extraced.$OUT:$FILTER %p --outdir ${2}${X} \n' | while read l; do eval $l; done - done -} - -unpackZip() -{ - find $1 -iname "*.zip" -printf 'X=`echo %h | sed -f $GH/sedKillSpace -e s:${1}::`; mkdir -p ${3}${X}; F=`echo %f | sed -f $GH/sedKillSpace`; cp -fv "%p" ${3}$X/$F \n' | while read l; do eval $l; done - find $3 -iname "*.zip" -printf 'X=`echo %h | sed s:${3}::`; mkdir -p ${ZIPTEMP}/${X}/UNZIPPED_%f/; unzip "%p" -d ${ZIPTEMP}${X}/UNZIPPED_%f/ 2> /dev/null; \n' | while read l; do eval $l; done - find $3 -iname "*.zip" -printf 'rm -rf %p \n' | while read l; do eval $l; done - - if [ -d ${ZIPTEMP} ]; then - if [ $COPYDIRTYPDF -eq 1 ]; then - pdfCopyDirty $ZIPTEMP $targetDir - else - pdfCopyClean $ZIPTEMP $targetDir - fi - copySafeFiles $ZIPTEMP $2 $3 - convertCopyFiles $ZIPTEMP $2 $3 - rm -rf ${TEMP}/* - rm -rf ${ZIPTEMP}/* - fi -} - -SRC=/src -DST=/dst if [ ! -d $SRC ]; then mkdir $SRC fi @@ -87,17 +44,21 @@ if [ ! -d $DST ]; then mkdir $DST fi -TEMP=/dst/temp -ZIPTEMP=/dst/ziptemp +if mount|grep $DST; then + umount $DST || true +fi + +TEMP=${DST}/temp +ZIPTEMP=${DST}/ziptemp FL=${DST}/filelist.txt -umount $DST 2> /dev/null -mount /dev/sdb1 $DST +mount ${DEV_DST} $DST + if [ $? -ne 0 ]; then -# echo Could not mount target USB stick! + echo Unable to mount ${DEV_DST} on $DST exit 1 else - echo Target USB device mounted at $DST + echo 'Target USB device ('${DEV_DST}') mounted at $DST' rm -rf $DST/FROM_PARTITION_* # mount temp and make sure it's empty @@ -112,38 +73,42 @@ fi COPYDIRTYPDF=0 PARTCOUNT=1 -PARTITIONS=`ls /dev/sda* | grep '/dev/sda[1-9][0-6]*'` -for partition in $PARTITIONS +for partition in $DEV_PARTITIONS do - echo Processing partition: ${PARTCOUNT} $partition - umount $SRC 2> /dev/null + echo Processing partition: ${partition} + if mount|grep $SRC; then + umount $SRC 2> /dev/null + fi + mount -r $partition $SRC if [ $? -ne 0 ]; then - echo could not mount $partition at /$SRC + echo Unable to mount ${partition} on $SRC else - echo $partition mounted at $SRC + echo $partition mounted at $SRC - echo PARTITION $PARTCOUNT >> $FL - find $SRC/* -printf 'echo %p | sed s:$SRC:: >> $FL \n' | while read l; do eval $l; done + echo PARTITION $PARTCOUNT >> $FL + # FIXME: eval probably insecure + find ${SRC}/* -printf 'echo "%p" | sed s:'${SRC}':: >> '${FL}' \n' | \ + while read l; do eval $l; done - # create a director on sdb named PARTION_n - targetDir=${DST}/FROM_PARTITION_${PARTCOUNT} - echo copying to: $targetDir - mkdir -p $targetDir + # create a director on sdb named PARTION_n + targetDir=${DST}/FROM_PARTITION_${PARTCOUNT} + echo copying to: $targetDir + mkdir -p $targetDir - if [ $COPYDIRTYPDF -eq 1 ]; then - pdfCopyDirty $SRC $targetDir - else - pdfCopyClean $SRC $targetDir - fi + #if [ $COPYDIRTYPDF -eq 1 ]; then + # pdfCopyDirty $SRC $targetDir + #else + # pdfCopyClean $SRC $targetDir + #fi - # copy stuff - copySafeFiles $SRC $targetDir - convertCopyFiles $SRC $targetDir $TEMP - rm -rf ${TEMP}/* + # copy stuff + #copySafeFiles $SRC $targetDir + #convertCopyFiles $SRC $targetDir $TEMP + #rm -rf ${TEMP}/* - # unpack and process archives - unpackZip $SRC $targetDir $TEMP + # unpack and process archives + #unpackZip $SRC $targetDir $TEMP fi let PARTCOUNT=$PARTCOUNT+1 done @@ -155,5 +120,5 @@ sync umount $SRC umount $DST -/sbin/shutdown -h now +#/sbin/shutdown -h now