#!/usr/bin/perl use Text::PDF::File; use Text::PDF::Utils; use Getopt::Std; $version = "1.8"; # MJPH 17-AUG-2009 add -u gutter for -p 2 # $version = "1.7"; # MJPH 29-FEB-2008 add -g -p r, fix contents problem for empty pages # $version = "1.6"; # MJPH 13-JAN-2003 -s2b option supported # $version = "1.505"; # MJPH 3-AUG-2002 -s comma separated. Allow -ve values in -s # merge errors store something, at least! # $version = "1.504"; # MJPH 27-JUN-2002 Use CropBox over MediaBox # $version = "1.503"; # MJPH 19-FEB-2002 Fix -p1 positioning (again!) # $version = "1.502"; # MJPH 18-JUN-2001 Add support for -p4s # $version = "1.501"; # MJPH 2-MAY-2001 Correct positioning of -s;;; type pages # $version = "1.500"; # MJPH 26-JUL-2000 Correct positioning in some cases and add landscape sizes # $version = "1.401"; # MJPH 28-JUN-2000 Debug content lists and -r # $version = "1.4"; # MJPH 1-FEB-2000 Add -l and preset paper sizes # $version = "1.302"; # MJPH 11-DEC-1999 Make sure updated root output # $version = "1.302"; # MJPH 30-NOV-1999 Update to use Text::PDF # $version = "1.300"; # MJPH 15-JUN-1999 Remove outlines, etc. when making booklet # $version = "1.201"; # MJPH 29-DEC-1998 Add test in merge_dict for $v2 not present # $version = "1.200"; # MJPH 6-NOV-1998 Merging external resources and change -r # $version = "1.101"; # MJPH 13-OCT-1998 Debug resource merging not being output # $version = "1.100"; # MJPH 3-AUG-1998 Support new PDF library getopts("b:g:h:lp:qrs:u:"); if (!defined $ARGV[0]) { die <<"EOT"; PDFBKLT [-b num/size] [-l] [-p num] [-g num] [-q] [-r] [-s size] pdffile (c) M. Hosken. Version: $version Converts a PDF file into a booklet. It edits the pdffile to add the modifications at the end. -b num/size Specifies which page contains the output page size details [1] or gives the dimensions of the page in pts (x;y) or A4,ltr,lgl,A5 -g num[;offset] Makes up page groups, or signatures with num pages in each and offset pages in the first signature. num and offset must be multiples of 4 -l Flag to indicate linear scaling -p num Specifies the number of pages on the output page (1, 2, 4) [2] If 4, can be 4s to flip on short edge. Append r if right to left. -q Quiet (no on screen messages) -r Rotates the output (-p 2 rotates automatically, -r rotates back) -s size Specifies the the location of the actual text on a page: 2 half page right and left (big gutter) 2r half page always on right 2l half page always on left 2b dual half pages on right and left assumes last page opposite first 4 1/4 page right and left bottom (very big gutter) 4t 1/4 page right and left top (very big gutter) 4r/4l/4rt/4lt 1/4 page always on right or left bottom or top location as a single string: minx,miny,maxx,maxy in pts -u width Specify the gutter size (for 2 pages per output page only) EOT } %sizes=( 'a4' => '595;842', 'a4l' => '842;595', 'ltr' => '612;792', 'ltrl' => '792;612', 'lgl' => '612;1008', 'lgll' => '1008;612', ); $opt_u = 0 unless defined $opt_u; $opt_b = $sizes{lc($opt_b)} if defined $sizes{lc($opt_b)}; print "Reading file\n" unless $opt_q; $p = Text::PDF::File->open($ARGV[0], 1) || die "Can't open $ARGV[0]"; $r = $p->{'Root'}->realise; $pgs = $r->{'Pages'}->realise; $pgcount = $pgs->{'Count'}->val; foreach (qw(Outlines Dests Threads AcroForm PageLabels StructTreeRoot OpenAction PageMode)) { if (defined $r->{$_}) { delete $r->{$_}; $p->out_obj($r); } } $pgcount = 0; $rtl = ($opt_p =~ s/r$//o); proc_pages($pgs); if ($opt_s =~ m/b$/o) { for ($i = 0; $i < $pgcount; $i++) { my ($pnum) = ($i == 0) ? 0 : 2 * $i - 1; my ($ref) = $pglist[$pnum]->copy($p, undef, 1, $p, 'clip' => ["^/Contents", '^/Resources/[^/]+/.*']); my ($npnum) = ($i == 0) ? -1 : $pnum + 1; $r->{'Pages'}->add_page($ref, $npnum); if ($npnum == -1) { push (@pglist, $ref); $pglist[$npnum]{' pnum'} = $pgcount * 2; } else { splice(@pglist, $npnum, 0, $ref); $pglist[$npnum]{' pnum'} = $npnum; } $ref->{' pnum'} = $pnum; print STDERR '.' unless $opt_q; } $pgcount *= 2; # $p->append_file; # exit(1); } $opt_p = 2 unless defined $opt_p; $opt_r = !$opt_r if (($opt_p == 2 && $opt_s !~ /^2[rltb]*$/oi) || ($opt_p != 2 && $opt_s =~ /^2[rlt]*$/oi)); if ($opt_b =~ m/^([0-9]+)\;([0-9]+)/oi) { @pbox = (0, 0, $1, $2); } else { $opt_b = 1 unless defined $opt_b; $opt_b--; foreach $n ($pglist[$opt_b]->find_prop('MediaBox')->elementsof) { push(@pbox, $n->val); } } $fpgins = PDFDict(); $p->new_obj($fpgins); $spgins = PDFDict(); $p->new_obj($spgins); $fpgins->{' stream'} = "q"; $spgins->{' stream'} = "Q"; unless ($opt_q) { print "\nThere are $pgcount pages\n"; print "Page box (pt) = $pbox[0], $pbox[1], $pbox[2], $pbox[3]\n"; } if ($opt_g) { my ($num, $offset) = split(/[\s;,:]/, $opt_g); my ($i); $offset ||= $num; for ($i = $offset; $i < $pgcount; $i += $num) { push (@jobs, $i); } } push(@jobs, ($opt_p == 1) ? $pgcount : int(($pgcount + 3) / 4) * 4); if ($opt_p > 1) { for ($i = $pgcount; $i < int(($pgcount + 3) /4) * 4; $i++) { if ($rtl) { unshift (@pglist, undef); } else { push (@pglist, undef); } } } for ($jn = 0; $jn < scalar @jobs; $jn++) { my ($first) = $jn ? $jobs[$jn - 1] : 0; my ($last) = $jobs[$jn]; for ($i = 0; $i < ($last - $first) / $opt_p; $i++) { if ($opt_p == 1) { next if $i >= $pgcount; @pl = ($pglist[$i + $first]); $m = $i + 1; } elsif ($opt_p == 2) { @pl = ($pglist[$i + $first], $pglist[$last - $i - 1]); @pl = ($pl[1], $pl[0]) if ($i & 1); $m = ($first + $i + 1) . ", " . ($last - $i); } else # $opt_p == 4 { @pl = ($pglist[2 * $i + $first]); # do these in a special order with increasing difficulty of passing if() requirement push (@pl, $pglist[2 * $i + 1 + $first]) if (2 * $i + 1 + $first < $pgcount); push (@pl, $pglist[$last - 2 * $i - 2]) if ($last - 2 * $i - 2 < $pgcount); push (@pl, $pglist[$last - 2 * $i - 1]) if ($last - 2 * $i <= $pgcount); $m = (2 * $i + 1 + $first) . ", " . (2 * $i + 2 + $first) . ", " . ($last - 2 * $i - 1) . ", " . ($last - 2 * $i); } print "Merging " . $m . "\n" unless $opt_q; merge_pages(@pl); } } $p->append_file; sub proc_pages { my ($pgs) = @_; my ($pgref); foreach $pgref ($pgs->{'Kids'}->elementsof) { print STDERR "." unless $opt_q; $pgref->realise; if ($pgref->{'Type'}->val =~ m/^Pages$/oi) { proc_pages($pgref); } else { if ($rtl) { unshift (@pglist, $pgref); } else { push (@pglist, $pgref); } $pgref->{' pnum'} = $pgcount++; } } } sub merge_pages { my (@pr) = @_; my ($s, $j, $bp); $s = undef; for ($j = 0; $j <= $#pr; $j++) { my ($n, $is, $rl, $bt, $xs, $ys, $scale, $scalestr, $scalestrr, $id, $i); my (@slist, @s, $s1, $s2, $p2, $min, $k); next unless defined $pr[$j]; @prbox = (); @clipbox = (); foreach $n (($pr[$j]->find_prop('CropBox') || $pr[$j]->find_prop('MediaBox'))->elementsof) { push(@prbox, $n->val); } $is = 1; if ($opt_s =~ m/^(-?[0-9]+),(-?[0-9]+),(-?[0-9]+),(-?[0-9]+)/o) { @prbox = ($1, $2, $3, $4); } elsif ($opt_s =~ m/^([0-9])(.?)(.?)$/o) { $is = $1; $rl = lc($2); $bt = lc($3); # $rl = ($pr[$j]->{' pnum'} & 1) ? "l" : "r" unless ($rl =~ m/[rl]/o); $rl = ($j & 1) ? "l" : "r" unless ($rl =~ m/[rl]/o); if ($rl eq "r") { $prbox[1] = $prbox[3] - (($prbox[3] - $prbox[1]) / $is); } elsif ($rl eq "l") { $prbox[3] = $prbox[1] + (($prbox[3] - $prbox[1]) / $is); } if ($bt eq "t") { $prbox[2] = $prbox[0] + (($prbox[2] - $prbox[0]) * 2 / $is); } elsif ($is == 4) { $prbox[0] = $prbox[2] - (($prbox[2] - $prbox[0]) * 2 / $is); } elsif ($opt_s =~ m/b$/o) { @clipbox = ($prbox[0], $prbox[1], $prbox[2] - $prbox[0], $prbox[3] - $prbox[1]); } } elsif ($opt_s) { die "Illegal -s value of $opt_s"; } $id = join(',', @prbox) . ",$opt_p"; if (!defined $scache{$id}) { @slist = (); $xs = ($pbox[2] - $pbox[0]) / ($prbox[2] - $prbox[0]); # $ys = ($pbox[3] - $pbox[1]) / ($prbox[3] - $prbox[1]); $ys = ($pbox[3] - $pbox[1] - $opt_u*2) / ($prbox[3] - $prbox[1]); $scale = ($prbox[3] - $prbox[1]) / ($prbox[2] - $prbox[0]); $rot = (($opt_p == 1 || $opt_p == 4) && $is == 2) || ($opt_p == 2 && $is != 2); if ($opt_l) { if ($xs < ($opt_p == 2 ? .5 : 1) * ($opt_r ? $scale * $scale : 1) * $ys) { $ys = $xs / ($opt_r ? $scale * $scale : 1) / ($opt_p == 2 ? .5 : 1); } else { $xs = $ys * ($opt_r ? $scale * $scale : 1) * ($opt_p == 2 ? .5 : 1); } } if ($opt_p == 1 && $is != 2) # portrait to portrait { $slist[0] = cm($xs, 0, 0, $ys, # $pbox[0] - ($xs * $prbox[0]), $pbox[1] - ($ys * $prbox[1])); .5 * ($pbox[2] + $pbox[0] - $xs * ($prbox[2] + $prbox[0])), .5 * ($pbox[3] + $pbox[1] - $ys * ($prbox[3] + $prbox[1])), @clipbox); } elsif ($opt_p == 1) # landscape on portrait to portrait { $slist[0] = cm(0, -$scale * $ys, $xs / $scale, 0, $pbox[0] - $xs * $prbox[1] / $scale, $pbox[1] + $scale * $ys * $prbox[2], @clipbox); } elsif ($opt_p == 2 && $is != 2) # portrait source on portrait { @scalestr = (0, 0.5 * $ys * $scale, -$xs / $scale, 0, 0.5 * ($xs * ($prbox[3] + $prbox[1]) / $scale + $pbox[2])); $slist[0] = cm(@scalestr, # 0.5 * (-$ys * $scale * $prbox[0] + $pbox[1] + $pbox[3])); .25 * (3 * ($pbox[1] + $pbox[3]) - $ys * $scale * ($prbox[2] + $prbox[0])) + $opt_u*0.5, @clipbox); $slist[1] = cm(@scalestr, # -0.5 * $ys * $scale * $prbox[0] + $pbox[1]); .25 * (3 * $pbox[1] + $pbox[3] - $ys * $scale * ($prbox[2] + $prbox[0])) - $opt_u*0.5, @clipbox); } elsif ($opt_p == 2) # double page landscape on portrait { @scalestr = ($xs, 0, 0, 0.5 * $ys, .5 * ($pbox[2] - $xs * ($prbox[2] - $prbox[0]))); # -$xs * $prbox[0] + $pbox[0]); # $slist[0] = cm(@scalestr, 0.5 * (-$ys * $prbox[1] + $pbox[1] + $pbox[3]), @clipbox); # $slist[1] = cm(@scalestr, -0.5 * $ys * $prbox[1] + $pbox[1], @clipbox); $slist[0] = cm(@scalestr, .25 * (3 * $pbox[3] + $pbox[1] - $ys * ($prbox[3] - $prbox[1])) - $prbox[1], @clipbox); $slist[1] = cm(@scalestr, .25 * (3 * $pbox[1] + $pbox[3] - $ys * ($prbox[3] - $prbox[1])) - $prbox[1], @clipbox); } elsif ($opt_p == 4 && $is == 1) # true portrait { $a = .5 * $xs; $b = .5 * $ys; $slist[0] = cm($a, 0, 0, $b, -$a * $prbox[0] + 0.5 * ($pbox[0] + $pbox[2]), -$b * $prbox[1] + $pbox[1], @clipbox); $slist[2] = cm(-$a, 0, 0, -$b, $a * $prbox[0] + 0.5 * ($pbox[0] + $pbox[2]), $b * $prbox[1] + $pbox[3], @clipbox); if ($opt_p =~ /s/o) { $slist[1] = cm($a, 0, 0, $b, -$a * $prbox[0] + 0.5 * ($pbox[0] + $pbox[2]), -$b * $prbox[1] + 0.5 * ($pbox[1] + $pbox[3]), @clipbox); $slist[3] = cm(-$a, 0, 0, -$b, $a * $prbox[0] + 0.5 * ($pbox[0] + $pbox[2]), $b * $prbox[1] + 0.5 * ($pbox[1] + $pbox[3]), @clipbox); } else { $slist[1] = cm(-$a, 0, 0, -$b, $a * $prbox[0] + $pbox[2], $b * $prbox[1] + $pbox[3], @clipbox); $slist[3] = cm($a, 0, 0, $b, -$a * $prbox[0] + $pbox[0], -$b * $prbox[1] + $pbox[1], @clipbox); } } elsif ($opt_p == 4) { $a = .5 * $ys * $scale; $b = .5 * $xs / $scale; $slist[0] = cm(0, -$a, $b, 0, -$b * $prbox[1] + 0.5 * ($pbox[0] + $pbox[2]), $a * $prbox[2] + $pbox[1], @clipbox); $slist[2] = cm(0, $a, -$b, 0, $b * $prbox[1] + 0.5 * ($pbox[0] + $pbox[2]), -$a * $prbox[2] + $pbox[3], @clipbox); if ($opt_p =~ /s/o) { $slist[1] = cm(0, -$a, $b, 0, -$b * $prbox[1] + 0.5 * ($pbox[0] + $pbox[2]), $a * $prbox[2] + 0.5 * ($pbox[1] + $pbox[3]), @clipbox); $slist[3] = cm(0, $a, -$b, 0, $b * $prbox[1] + 0.5 * ($pbox[0] + $pbox[2]), -$a * $prbox[2] + 0.5 * ($pbox[1] + $pbox[3]), @clipbox); } else { $slist[1] = cm(0, $a, -$b, 0, $b * $prbox[1] + $pbox[2], -$a * $prbox[2] + $pbox[3], @clipbox); $slist[3] = cm(0, -$a, $b, 0, -$b * $prbox[1] + $pbox[0], $a * $prbox[2] + $pbox[1], @clipbox); } } $scache{$id} = [@slist]; } @s = $pr[$j]->{'Contents'}->elementsof if (defined $pr[$j]->{'Contents'}); if (!defined $s) { $min = 100000; for ($k = 0; $k <= $#pr; $k++) { next unless defined $pr[$k]; if ($pr[$k]->{' pnum'} < $min) { $bp = $pr[$k]; $min = $pr[$k]->{' pnum'}; } } $s = PDFArray(); $bp->{' Contents'} = $p->new_obj($s); $bp->{'Rotate'} = PDFNum(90) if ($opt_r); } next unless defined $pr[$j]; $s->add_elements($fpgins) unless $j == $#pr; $s->add_elements($scache{$id}[$j]) unless ($opt_h == 17 && $j == $#pr); $s->add_elements(@s); $s->add_elements($spgins) unless $j == $#pr; if ($pr[$j] ne $bp) { $pr2 = $pr[$j]; $s1 = $bp->find_prop('Resources'); $s2 = $pr2->find_prop('Resources'); $bp->{'Resources'} = merge_dict($s1, $s2) unless ($s1 eq $s2); $p->free_obj($pr2); while (defined $pr2->{'Parent'}) { $temp = $pr2->{'Parent'}; $temp->{'Kids'}->removeobj($pr2) if ($pr2->{'Type'}{'val'} eq 'Page' || $pr2->{'Kids'}->elementsof <= 0); $temp->{'Count'}{'val'}--; $pr2 = $temp; if ($pr2->{'Kids'}->elementsof <= 0) { print "Killing a tree! $pr2->{'num'}\n" unless $opt_q; $p->free_obj($pr2); } else { $p->out_obj($pr2); } } } } return 1 unless defined $bp; $bp->{'Contents'} = delete $bp->{' Contents'}; foreach (qw(Annots Thumb Beeds CropBox)) { delete $bp->{$_} if defined $bp->{$_}; } $bp->bbox(@pbox); $p->out_obj($bp); } sub merge_dict { my ($p1, $p2) = @_; return $p1 if (ref $p1 eq "Text::PDF::Objind" && ref $p2 eq "Text::PDF::Objind" && $p->{' objects'}{$p1->uid}[0] eq $p->{' objects'}{$p2->uid}[0]); # $p1 = $p->read_obj($p1) if (ref $p1 eq "Text::PDF::Objind"); # $p2 = $p->read_obj($p2) if (ref $p2 eq "Text::PDF::Objind"); $p1->realise; $p2->realise; my ($k, $v1, $v2); my (@a1, @a2, %a1); if ($p1->{' isvisited'}) { warn "circular reference!"; return $p1; } $p1->{' isvisited'} = 1; $p2->{' isvisited'} = 1; foreach $k (keys %{$p1}) { next if ($k =~ m/^\s/oi); $v1 = $p1->{$k}; $v2 = $p2->{$k}; next if $v1 eq $v2 || !defined $v2; # !defined added v1.201 if (ref $v1 eq "Text::PDF::Objind" || ref $v2 eq "Text::PDF::Objind") { $v1->realise if (ref $v1 eq "Text::PDF::Objind"); $v2->realise if (ref $v2 eq "Text::PDF::Objind"); } next unless defined $v2; # assume $v1 & $v2 are of the same type if (ref $v1 eq "Text::PDF::Array") { # merge contents of array uniquely (the array is a set) @a1 = $v1->elementsof; @a2 = $v2->elementsof; map { $a1{$_} = 1; } @a1; push (@a1, grep (!$a1{$_}, @a2)); $p1->{$k} = PDFArray(@a1); } elsif (ref $v1 eq "Text::PDF::Dict") # { $p1->{$k} = merge_dict($v1->val, $v2->val); } { $p1->{$k} = merge_dict($v1, $v2); } elsif ($v1->val ne $v2->val) { warn "Inconsistent dictionaries at $k with " . $v1->val . " and " . $v2->val; $p1->{$k} = $v1; } } foreach $k (grep (!defined $p1->{$_} && $_ !~ m/^\s/oi, keys %{$p2})) { $p1->{$k} = $p2->{$k}; } $p->out_obj($p1) if $p1->is_obj($p); delete $p1->{' isvisited'}; delete $p2->{' isvisited'}; return $p1; } sub cm { my (@a) = @_; my ($res, $r, $str); foreach $r (@a) { $r = int($r) if (abs($r - int($r)) < 1e-6); } $str = "$a[6] $a[7] $a[8] $a[9] re W n" if (defined $a[6]); return undef if ($a[0] == 1 && $a[1] == 0 && $a[2] == 0 && $a[3] == 1 && $a[4] == 0 && $a[5] == 0 && $str eq ''); $res = PDFDict(); $p->new_obj($res); $res->{' stream'} = "$a[0] $a[1] $a[2] $a[3] $a[4] $a[5] cm $str"; $res; } sub copy_page { my ($page) = @_; return undef unless $page; my ($res) = $page->copy; $p->new_obj($res); $res; }