Keith Vetter 2018-08-31 - A script that uses the
ao3 package to download fan-fiction stories from Archive of Our Own and turn them into epubs.
see also:
- EpubCreator -- tool to create an epub from html pages
- ao3 -- interface to get data about Archive of Our Own fan-fiction stories
##+##########################################################################
#
# ao3ToEpub -- uses the AO3 library to create an epub for an Archive of Our Own story
# Requires ao3.tsh and epubcreator.tsh, both can be found on http://wiki.tcl.tk
#
# by Keith Vetter 2018-08-29
#
package require fileutil
if {! [file exist ao3.tsh]} {
error "missing ao3.tsh--download from http://wiki.tcl.tk/ao3"
}
source ao3.tsh
proc IntoEpub {id} {
puts "Building epub for story $id from Archive of Our Own"
set tmpdir [file join [::fileutil::tempdir] "ao3_epubCreator_$id"]
file delete -force $tmpdir
file mkdir $tmpdir
set ao3 [::AO3::New $id 0 cache]
puts "[$ao3 url]\n"
$ao3 save [file join $tmpdir raw.html]
set title [$ao3 title]
set author [$ao3 author]
set files [ChaptersIntoFiles $ao3 $tmpdir]
$ao3 cleanup
set output [file join ~/FBooks [regsub -all {\W} [string map {" " _} $title] ""].epub]
set cmd [findEpubCreator]
append cmd " \n -verbose 0 \n -output \"$output\" "
append cmd "\n -title \"$title\" \n -author \"$author\" \n -data "
foreach fname $files {
append cmd "\n \"$fname\" "
}
puts "\nrunning: [string map {\n \\\n} $cmd]"
puts [exec {*}$cmd]
if {[string first [::fileutil::tempdir] $tmpdir] > -1} {
file delete -force $tmpdir
}
}
proc findEpubCreator {} {
foreach dir {"" . ..} {
set epubCreator [auto_execok [file join $dir epubCreator.tsh]]
if {$epubCreator ne ""} { return $epubCreator }
}
error "missing epubCreator.tsh--download from http://wiki.tcl.tk/epubCreator"
}
proc ChaptersIntoFiles {ao3 tmpdir} {
set fnames {}
set count [$ao3 chapter . count]
if {$count == 0} { set count 1 }
for {set idx 1} {$idx <= $count} {incr idx} {
set html [$ao3 chapter $idx html]
set html [TweakHtml $html]
set fname [file join $tmpdir "chapter_${idx}.html"]
puts "chapter $idx -> $fname"
lappend fnames $fname
set fout [open $fname "w"]
puts -nonewline $fout $html
close $fout
}
return $fnames
}
proc TweakHtml {html} {
# Fix html to be epub3 legal and nicer looking.
# remove all <a> tags
regsub -all {</??a.*?>} $html "" html
# remove <h3>Chapter Text</h3>
regsub -all {<h3[^>]*>Chapter Text</h3>} $html "" html
return $html
}
if {$argv eq {}} {
puts "usage: [file tail $argv0] story_id"
return
}
set id [lindex $argv 0]
IntoEpub $id
return