Keith Vetter 2014-03-14 : Along with the recently posted
epubCreator, here's another tool for working with
epubs. This one opens up an epub and extracts all the metadata associated with it.
If you run it using tclsh, it will print the results to the terminal. If you run it using wish or specify "--tk" it will display the results in a window along with the book's cover image.
##+##########################################################################
#
# epubDump.tsh -- dumps out metadata in an epub file
# by Keith Vetter 2014-03-14
#
package require Tcl 8.6
package require vfs::zip
package require tdom
set epubMount /__epub
set opf_namespaces {ns "http://www.idpf.org/2007/opf"
xsi "http://www.w3.org/2001/XMLSchema-instance"
opf "http://www.idpf.org/2007/opf"
dcterms "http://purl.org/dc/terms/"
calibre "http://calibre.kovidgoyal.net/2009/metadata"
dc "http://purl.org/dc/elements/1.1/"}
set meta_namespaces {mm urn:oasis:names:tc:opendocument:xmlns:container}
proc DumpEpub {epubFile} {
set mnt [::vfs::zip::Mount [file normalize $epubFile] $::epubMount]
try {
DumpEpub2 $epubFile
} finally {
::vfs::zip::Unmount $mnt $::epubMount
}
}
proc DumpEpub2 {epubFile} {
set opf_file [FindOPF]
set opf_dir [file dirname $opf_file]
set opf [ReadAllData [file join $::epubMount $opf_file]] ; list
dom parse $opf doc
$doc selectNodesNamespaces $::opf_namespaces
set root [$doc documentElement]
set keys [list epub title author "epub version"]
set values {}
lappend values $epubFile
lappend values [[[$root selectNodes //dc:title] firstChild] nodeValue]
set authors {}
foreach node [$root selectNodes //dc:creator] {
lappend authors [[$node firstChild] nodeValue]
}
lappend values [join $authors ", "]
lappend values [$root getAttribute version]
for {set metaNode [[$root selectNodes //ns:metadata] firstChild]} \
{$metaNode ne ""} \
{set metaNode [$metaNode nextSibling]} {
set nodeName [$metaNode nodeName]
if {$nodeName in {dc:title dc:creator}} continue
if {$nodeName eq "meta"} {
if {[$metaNode hasAttribute name]} {
set name [$metaNode getAttribute name]
set content [$metaNode getAttribute content]
lappend keys "meta/$name"
lappend values $content
}
} elseif {[string match "dc:*" $nodeName]} {
set nodeName [string range $nodeName 3 end]
lappend keys $nodeName
set value [[$metaNode firstChild] nodeValue]
if {$nodeName eq "identifier" && [$metaNode hasAttribute opf:scheme]} {
set scheme [$metaNode getAttribute opf:scheme]
lappend values "$scheme $value"
} else {
lappend values $value
}
}
}
# Cover image
lappend keys "cover image"
set value ""
set coverName ""
while {1} {
# Epub 3.0 way of specifying the cover image
set coverNode [$root selectNodes {//ns:item[@properties="cover-image"]}]
if {$coverNode ne ""} {
set href [$coverNode getAttribute href]
set coverName [file join $opf_dir $href]
set value $coverName
break
}
# Epub 2.0 way of specifying the cover image
set coverNode [$root selectNodes {//ns:meta[@name="cover"]}]
if {$coverNode ne ""} {
set coverId [$coverNode getAttribute content]
set coverItemNode [$root selectNodes //ns:item\[@id=\"$coverId\"\]]
set href [$coverItemNode getAttribute href]
set coverName [file join $opf_dir $href]
set value $coverName
break
}
# non-standard ways of specifying cover image
set guessCover [$root selectNodes {
//ns:item[translate(@href,"MSRCOVE","msrcove")="msrcover.jpg"
and starts-with(@media-type,"image/")]}]
if {$guessCover ne ""} {
set href [$guessCover getAttribute href]
set coverName [file join $opf_dir $href]
set value "? $coverName ?"
break
}
set guess [$root selectNodes {//ns:item[contains(translate(@href,"COVER","cover"),"cover")
and starts-with(@media-type,"image/")]}]
if {$guess eq ""} {
set guess [$root selectNodes {//ns:item[contains(@href,"_msr_cvi_r")
and starts-with(@media-type,"image/")]}]
}
if {[llength $guess] > 0} {
set href [[lindex $guess 0] getAttribute href]
set coverName [file join $opf_dir $href]
set value "?? $coverName ??"
break
}
# No luck finding cover image
break
}
lappend values $value
# Spine items
lappend keys "spine items"
lappend values [llength [[$root selectNodes //ns:spine] childNodes]]
unset doc
ShowResult $keys $values $coverName
}
proc ShowResult {keys values coverName} {
if {[info exists ::tk_version]} {
set results [PrettyPrint $keys $values]
ShowCover [file tail [lindex $values 0]] $coverName $results
} else {
puts [PrettyPrint $keys $values]
puts ""
}
}
proc PrettyPrint {keys values} {
set max 0
foreach key $keys { set max [expr {max($max,[string length $key])}] }
set max [expr {min($max,15)}]
set maxValue [expr {55-$max}]
set result "[file tail [lindex $values 0]]\n\n"
foreach key $keys value $values {
if {[string length $value] > $maxValue} { set value "[string range $value 0 $maxValue-3]..." }
append result [format "%-${max}s : %s\n" $key $value]
}
return $result
}
proc ReadAllData {fname} {
set fin [open $fname r]
set data [read $fin] ;list
close $fin
return $data
}
proc FindOPF {} {
set container [ReadAllData [file join $::epubMount META-INF container.xml]] ; list
dom parse $container doc
set root [$doc documentElement]
set opf_file [[$root selectNodes -namespace $::meta_namespaces //mm:rootfile] \
getAttribute full-path]
unset doc
return $opf_file
}
proc ShowCover {epubName coverName result} {
set x [expr {[lindex [concat [.c bbox all] x x -5 x] 2] + 5}]
set item [.c create text $x 0 -anchor nw -text [string trim $result] -font textFont]
lassign [.c bbox $item] x0 y0 x1 y1
incr y1 5
if {$coverName ne ""} {
set iname [image create photo -file [file join $::epubMount $coverName]]
if {[image width $iname] < ($x1-$x0)} {
set xmid [expr {($x0+$x1)/2}]
set item [.c create image $xmid $y1 -anchor n -image $iname]
} else {
set item [.c create image $x $y1 -anchor nw -image $iname]
}
.c create rect [.c bbox $item] -fill {} -outline black -width 1
}
lassign [.c bbox all] . . x1 y1
.c lower [.c create rect $x 0 $x1 $y1 -tag a -fill white -outline white]
# Must fit this entry
.c config -width [expr {max($x1-$x,[.c cget -width])}]
.c config -height [expr {max($y1,[.c cget -height])}]
.c config -scrollregion [.c bbox all]
}
proc DoDisplay {} {
if {! [info exists ::tk_version]} return
package require Img
wm deiconify .
wm title . epubDump
::ttk::scrollbar .sb_x -command [list .c xview] -orient horizontal
canvas .c -xscrollcommand [list .sb_x set] -highlightthickness 0 -bg red
pack .sb_x -side bottom -fill x
pack .c -side top -fill both -expand 1
bind .c <2> [bind Text <2>] ;# Enable dragging w/ <2>
bind .c <B2-Motion> [bind Text <B2-Motion>]
if {"textFont" ni [font names]} {
font create textFont -family Courier -size 8 -weight bold
}
}
set epubFile ~/Downloads/On_Basilisk_Station.epub
if {"--tk" in $argv} { package require Tk }
DoDisplay
if {$tcl_interactive} return
foreach epubFile $argv {
if {$epubFile eq "--tk"} continue
DumpEpub $epubFile
}
return