#!/bin/sh
# pdfcrush -- Optimize the use of the fonts in a PDF file by using ps2pdfwr.
# Copyright (C) 2005-2024 Vincent Lefevre
#
# Warning! pdfcrush overwrites the files given in argument, and
# in some cases, the result can be bigger than the original PDF
# file (and sometimes look different).
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 3
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see .
#
# History:
# 2024-09-01: Updated copyright notice.
# 2024-03-20: Added a comment about Ghostscript's AutoRotatePages option.
# 2023-11-24:
# * Abort with some Ghostscript versions (ToUnicode CMap issue).
# * Use ps2pdfwr (for the latest PDF version) rather than ps2pdf
# (which just executes "ps2pdfwr -dCompatibilityLevel=1.4").
# 2022-12-04: Use the -dAutoRotatePages=/None gs option with ps2pdf.
# 2021-01-17: Bug fix: do not restore the PageMedia data.
# 2011-04-30: Added support for pdftk 1.44 and later.
# 2011-02-21: Space normalization.
# 2010-03-04: Decode more entities.
# 2010-01-12: Bug fix.
# 2009-10-25: No longer test the pdftk version (not reliable).
# 2009-10-02:
# * Decode entities (for non-ASCII characters).
# * Output version information.
# 2009-09-25:
# * Secure temporary files.
# * CreationDate and Creator restored if pdftk 1.41+ is available.
# * Make sure that no data are lost.
# * Cleanup of temporary files in case of error.
# 2007-05-17:
# * No longer convert into PS as an intermediate format.
# * The input file is replaced by the output.
# * New usage: each argument is a PDF file to process.
# 2005-12-14: Initial version.
echo 'This is $Id: pdfcrush 171414 2024-09-01 12:33:49Z vinc17/qaa $' | \
sed -e 's/.Id: //;s/Z .*//'
if [ $# -eq 0 ]; then
echo "Usage: pdfcrush ..." >&2
exit 1
fi
set -e
err=0
gsv=`gs --version`
if [ "x$gsv" = x10.00.0 ] || \
[ "x$gsv" = x10.02.0 ]; then
printf >&2 "%s\n" \
"Ghostscript $gsv may regenerate an incorrect ToUnicode CMap:" \
" https://bugs.ghostscript.com/show_bug.cgi?id=707237" \
"It must not be used. Aborting."
exit 1
fi
# Directory for temporary files. Note: to avoid data loss in case of
# problem (e.g. reboot at the wrong time), this directory should not
# be relative to /tmp; so, let's use /var/tmp (the current directory
# may be a bad idea and may not be writable).
tmpdir=`mktemp -d /var/tmp/pdfcrush-XXXXXXXX`
trap 'rm -rf $tmpdir' 0
tmpinfo="$tmpdir/info"
tmppdf1="$tmpdir/crushed.pdf"
tmppdf2="$tmpdir/final.pdf"
keys='Creator|CreationDate|Title|Subject|Keywords|Author'
# pdftk will be used to restore the metadata, if available.
# Warning! The official pdftk 1.41 version is broken; you may
# need the handle_utf8_data_in_update_info patch from Debian.
pdftkv=`pdftk --version 2> /dev/null || true`
for i in "$@"
do
if file "$i" | grep -q 'PDF document'; then
if [ -n "$pdftkv" ]; then
printf "Getting metadata of file %s\n" "$i"
if pdftk --help | grep -q dump_data_utf8; then
pdftk "$i" dump_data_utf8 > "$tmpinfo"
update=update_info_utf8
else
pdftk "$i" dump_data | perl > "$tmpinfo" -CO -ne \
"/^InfoKey: ($keys)\$/ or next; print; \$_ = <>;
/^InfoValue: / or die; s/(\\d+);/chr\$1/eg;
s/>/>/g; s/</> "$tmpinfo" "InfoKey: ModDate\nInfoValue: D:%s\n" \
$(date +%Y%m%d%H%M%S%z | sed "s/\([-+]..\)\(..\)/\1'\2'/")
pdftk "$tmppdf1" $update "$tmpinfo" output "$tmppdf2"
else
echo "pdftk not found; metadata not modified."
mv "$tmppdf1" "$tmppdf2"
fi
trap 'if [ $? = 0 ]; then rm -rf $tmpdir; \
else echo "Backup in $tmpdir"; fi' 0
mv -f "$tmppdf2" "$i"
trap 'rm -rf $tmpdir' 0
printf "Successfully processed file %s\n" "$i"
else
printf "Skipping %s (not a PDF file)\n" "$i" >&2
err=2
fi
done
exit $err