Updated 2016-03-28 19:16:02 by pooryorick

images2pdf is a command-line utility that uses pdf4tcl to store create a pdf document from images and other pdf documents, without modifying the image data.

Synopsis  edit

images2pdf ?option value ...? ?option {*}values?

Options  edit

outfile
The name of the output file. By default, this is derived from outprefix
height
The maximum height of any page. The default is -1, which means the height of each image is used.
width
The maximum width of any page. The default is 800.
infile
The name of a file to include. Supported file types are pdf, jpeg, or png.
outprefix
A prefix to use for any files produced. If not provided, one is chosen.
infiles
Must be the last option provided, as it signifies that the remaining arguments are image filenames.

Description  edit

PYK 2016-03-10: To verify that the included images have not been modified, extract them again with a utility like pdfimages -all that just extacts images without processing them. The extracted images should be identical, bit-for-bit, to the original images.

Each image is output as an separate page, and the width and height of the page are the width and height of the image. To constrain one or the other, use the height or width options. The image width/height ratio is always preserved. The constraint only reflects how the image is dynamically resized for presentation, not now the image is stored in the PDF file -- which for png and jpg files is always as a bit-for-bit identical copy of the original image. Currently, only jpeg and png files are supported.

Some PDF readers don't provide controls to zoom to a size smaller than the dimensions of the page that contain it, but they do provide controls to magnify the image, so a default value of 800 is a good choice to ensure that the initial image fits into a reasonable display width, while still making it possible to zoom in for greater detail.

Utilities that might be used in conjuction with this script include tiff2pdf and jpegtran for lossless transformations of jpeg images.

Implementation  edit

#! /bin/env tclsh

package require fileutil::magic::filetype

package require pdf4tcl

proc main {argv0 argv} {
    set dims {}
    set ftypes {}
    set infiles {}
    set orient {}
    set maxheight -1
    set maxwidth 800
    while {[llength $argv]} {
        set argv [lassign $argv[set argv {}] key val]
        switch $key {
            outfile {
                set outfile $val
            }
            height {
                set maxheight $val
            }
            infile {
                lappend infiles $val
            }
            infiles {
                lappend infiles $val {*}$argv[set argv {}]
            }
            outprefix {
                set outprefix $val
            }
            width {
                set maxwidth val
            }
            default {
                return -code error [list {unknown option} $key]
            }
        }
    }
    if {[info exists outprefix]} {
        while {[file exists $outprefix-[incr outi]]} {}
    } else {
        while {[llength [glob -nocomplain [set outprefix [
            string repeat 0 [incr outi]]]*]]} {}
    }

    pdf4tcl::new mypdf
    foreach infile $infiles {
        set ftype [fileutil::magic::filetype $infile]
        if {[string match {JPEG *} $ftype]} {
            set ftype jpeg
        } elseif {[string match {TIFF *} $ftype]} {
            set ftype tiff
        } elseif {[string match {PDF *} $ftype]} {
            set ftype pdf
        } else {
            return -code error [list {unknown file type} $ftype]
        }
        lappend ftypes $ftype
        switch $ftype {
            jpeg - tiff {
                # first run is just to get image dimensions
                set id [mypdf addImage $infile -type $ftype]
                set width [mypdf getImageWidth $id]
                set height [mypdf getImageHeight $id]
                puts stderr [list infile $infile type $ftype  height $height width $width]
                while {($maxwidth > -1 && $width > $maxwidth) 
                    || ($maxheight > -1 && $height > $maxheight)} {

                    set height [expr {$height / 2}]
                    set width [expr {$width / 2}]
                }
                lappend dims [list $width $height]
            }
            default {
                lappend dims {}
            }
        }
    }
    mypdf destroy

    set tmpfiles {}
    set idx -1
    foreach infile $infiles[set infiles {}] dim $dims ftype $ftypes {
        switch $ftype {
            jpeg - tiff {
                pdf4tcl::new mypdf -paper $dim
                set id [mypdf addImage $infile -type $ftype]
                mypdf putImage $id 0 0 -width [lindex $dim 0] -height [lindex $dim 1]
                set fname $outprefix-[incr idx].pdf
                mypdf write -file $fname 
                lappend tmpfiles $fname
                lappend infiles $fname
                mypdf destroy
            }
            pdf {
                lappend infiles $infile
            }
            default {
                return -code error [list {unknown file type} $ftype {for file}  $infile]
            }
        }
    }
    if {[llength $infiles] > 1} {
        if {![info exists outfile]} {
            set outfile $outprefix-[incr idx].pdf
        }
        if {[file exists $outfile]} {
            return -code error [list {file already exists} $outfile]
        }
        pdf4tcl::catPdf {*}$infiles $outfile
        file delete {*}$tmpfiles
    } else {
        if {[info exists outfile]} {
            if {[file exists $outfile]} {
                return -code error [list {file already exists} $outfile]
            }
            file rename [lindex $infiles 0] $outfile
        }
    }
}

main $argv0 $argv