#!/bin/sh # pdfcrush -- Optimize the use of the fonts in a PDF file by using ps2pdfwr. # Copyright (C) 2005-2024 Vincent Lefevre # # Warning! pdfcrush overwrites the files given in argument, and # in some cases, the result can be bigger than the original PDF # file (and sometimes look different). # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, see . # # History: # 2024-09-01: Updated copyright notice. # 2024-03-20: Added a comment about Ghostscript's AutoRotatePages option. # 2023-11-24: # * Abort with some Ghostscript versions (ToUnicode CMap issue). # * Use ps2pdfwr (for the latest PDF version) rather than ps2pdf # (which just executes "ps2pdfwr -dCompatibilityLevel=1.4"). # 2022-12-04: Use the -dAutoRotatePages=/None gs option with ps2pdf. # 2021-01-17: Bug fix: do not restore the PageMedia data. # 2011-04-30: Added support for pdftk 1.44 and later. # 2011-02-21: Space normalization. # 2010-03-04: Decode more entities. # 2010-01-12: Bug fix. # 2009-10-25: No longer test the pdftk version (not reliable). # 2009-10-02: # * Decode entities (for non-ASCII characters). # * Output version information. # 2009-09-25: # * Secure temporary files. # * CreationDate and Creator restored if pdftk 1.41+ is available. # * Make sure that no data are lost. # * Cleanup of temporary files in case of error. # 2007-05-17: # * No longer convert into PS as an intermediate format. # * The input file is replaced by the output. # * New usage: each argument is a PDF file to process. # 2005-12-14: Initial version. echo 'This is $Id: pdfcrush 171414 2024-09-01 12:33:49Z vinc17/qaa $' | \ sed -e 's/.Id: //;s/Z .*//' if [ $# -eq 0 ]; then echo "Usage: pdfcrush ..." >&2 exit 1 fi set -e err=0 gsv=`gs --version` if [ "x$gsv" = x10.00.0 ] || \ [ "x$gsv" = x10.02.0 ]; then printf >&2 "%s\n" \ "Ghostscript $gsv may regenerate an incorrect ToUnicode CMap:" \ " https://bugs.ghostscript.com/show_bug.cgi?id=707237" \ "It must not be used. Aborting." exit 1 fi # Directory for temporary files. Note: to avoid data loss in case of # problem (e.g. reboot at the wrong time), this directory should not # be relative to /tmp; so, let's use /var/tmp (the current directory # may be a bad idea and may not be writable). tmpdir=`mktemp -d /var/tmp/pdfcrush-XXXXXXXX` trap 'rm -rf $tmpdir' 0 tmpinfo="$tmpdir/info" tmppdf1="$tmpdir/crushed.pdf" tmppdf2="$tmpdir/final.pdf" keys='Creator|CreationDate|Title|Subject|Keywords|Author' # pdftk will be used to restore the metadata, if available. # Warning! The official pdftk 1.41 version is broken; you may # need the handle_utf8_data_in_update_info patch from Debian. pdftkv=`pdftk --version 2> /dev/null || true` for i in "$@" do if file "$i" | grep -q 'PDF document'; then if [ -n "$pdftkv" ]; then printf "Getting metadata of file %s\n" "$i" if pdftk --help | grep -q dump_data_utf8; then pdftk "$i" dump_data_utf8 > "$tmpinfo" update=update_info_utf8 else pdftk "$i" dump_data | perl > "$tmpinfo" -CO -ne \ "/^InfoKey: ($keys)\$/ or next; print; \$_ = <>; /^InfoValue: / or die; s/&#(\\d+);/chr\$1/eg; s/>/>/g; s/</> "$tmpinfo" "InfoKey: ModDate\nInfoValue: D:%s\n" \ $(date +%Y%m%d%H%M%S%z | sed "s/\([-+]..\)\(..\)/\1'\2'/") pdftk "$tmppdf1" $update "$tmpinfo" output "$tmppdf2" else echo "pdftk not found; metadata not modified." mv "$tmppdf1" "$tmppdf2" fi trap 'if [ $? = 0 ]; then rm -rf $tmpdir; \ else echo "Backup in $tmpdir"; fi' 0 mv -f "$tmppdf2" "$i" trap 'rm -rf $tmpdir' 0 printf "Successfully processed file %s\n" "$i" else printf "Skipping %s (not a PDF file)\n" "$i" >&2 err=2 fi done exit $err