images2pdf is a command-line utility that uses
pdf4tcl to store create a
pdf document from images and other pdf documents, without modifying the image data.
Synopsis edit
-
- images2pdf ?option value ...? ?option {*}values?
- outfile
- The name of the output file. By default, this is derived from outprefix
- height
- The maximum height of any page. The default is -1, which means the height of each image is used.
- width
- The maximum width of any page. The default is 800.
- infile
- The name of a file to include. Supported file types are pdf, jpeg, or png.
- outprefix
- A prefix to use for any files produced. If not provided, one is chosen.
- infiles
- Must be the last option provided, as it signifies that the remaining arguments are image filenames.
Description edit
PYK 2016-03-10: To verify that the included images have not been modified, extract them again with a utility like
pdfimages -all that just extacts images without processing them. The extracted images should be identical, bit-for-bit, to the original images.
Each image is output as an separate page, and the width and height of the page are the width and height of the image. To constrain one or the other, use the
height or
width options. The image width/height ratio is always preserved. The constraint only reflects how the image is dynamically resized for presentation, not now the image is stored in the PDF file -- which for png and jpg files is always as a bit-for-bit identical copy of the original image. Currently, only
jpeg and
png files are supported.
Some PDF readers don't provide controls to zoom to a size smaller than the dimensions of the page that contain it, but they do provide controls to magnify the image, so a default value of
800 is a good choice to ensure that the initial image fits into a reasonable display width, while still making it possible to zoom in for greater detail.
Utilities that might be used in conjuction with this script include
tiff2pdf and
jpegtran for lossless transformations of jpeg images.
Implementation edit
#! /bin/env tclsh
package require fileutil::magic::filetype
package require pdf4tcl
proc main {argv0 argv} {
set dims {}
set ftypes {}
set infiles {}
set orient {}
set maxheight -1
set maxwidth 800
while {[llength $argv]} {
set argv [lassign $argv[set argv {}] key val]
switch $key {
outfile {
set outfile $val
}
height {
set maxheight $val
}
infile {
lappend infiles $val
}
infiles {
lappend infiles $val {*}$argv[set argv {}]
}
outprefix {
set outprefix $val
}
width {
set maxwidth val
}
default {
return -code error [list {unknown option} $key]
}
}
}
if {[info exists outprefix]} {
while {[file exists $outprefix-[incr outi]]} {}
} else {
while {[llength [glob -nocomplain [set outprefix [
string repeat 0 [incr outi]]]*]]} {}
}
pdf4tcl::new mypdf
foreach infile $infiles {
set ftype [fileutil::magic::filetype $infile]
if {[string match {JPEG *} $ftype]} {
set ftype jpeg
} elseif {[string match {TIFF *} $ftype]} {
set ftype tiff
} elseif {[string match {PDF *} $ftype]} {
set ftype pdf
} else {
return -code error [list {unknown file type} $ftype]
}
lappend ftypes $ftype
switch $ftype {
jpeg - tiff {
# first run is just to get image dimensions
set id [mypdf addImage $infile -type $ftype]
set width [mypdf getImageWidth $id]
set height [mypdf getImageHeight $id]
puts stderr [list infile $infile type $ftype height $height width $width]
while {($maxwidth > -1 && $width > $maxwidth)
|| ($maxheight > -1 && $height > $maxheight)} {
set height [expr {$height / 2}]
set width [expr {$width / 2}]
}
lappend dims [list $width $height]
}
default {
lappend dims {}
}
}
}
mypdf destroy
set tmpfiles {}
set idx -1
foreach infile $infiles[set infiles {}] dim $dims ftype $ftypes {
switch $ftype {
jpeg - tiff {
pdf4tcl::new mypdf -paper $dim
set id [mypdf addImage $infile -type $ftype]
mypdf putImage $id 0 0 -width [lindex $dim 0] -height [lindex $dim 1]
set fname $outprefix-[incr idx].pdf
mypdf write -file $fname
lappend tmpfiles $fname
lappend infiles $fname
mypdf destroy
}
pdf {
lappend infiles $infile
}
default {
return -code error [list {unknown file type} $ftype {for file} $infile]
}
}
}
if {[llength $infiles] > 1} {
if {![info exists outfile]} {
set outfile $outprefix-[incr idx].pdf
}
if {[file exists $outfile]} {
return -code error [list {file already exists} $outfile]
}
pdf4tcl::catPdf {*}$infiles $outfile
file delete {*}$tmpfiles
} else {
if {[info exists outfile]} {
if {[file exists $outfile]} {
return -code error [list {file already exists} $outfile]
}
file rename [lindex $infiles 0] $outfile
}
}
}
main $argv0 $argv