- create a new user in group 'users' to run it, because the user it runs as needs mail support. (There are other ways to handle this, this is the simplest.)
- edit the configuration options
- let er rip.
#!/bin/sh
# the next line restarts using -*-Tcl-*-sh \
exec tclsh "$0" ${1+"$@"}
# HTTP over SMTP tunnelling Daemon
#
# Version 1.0.0
#
# produces a tarball with a rewritten version of the
# target URL and all image files necessary to view the
# page as it would be seen in a browser.
#
# handles email saved into a special
# mailbox by parsing the Subject: line
# when of the form:
#
# user@host.domain!http://www.insecure.org/
#
# THE TRAILING SLASH IS REQUIRED WHEN NO .html
# PAGE IS SPECIFIED!!
#
# notice the "!" separator between the email and URL.
#
# optionally, a password can be used by combining it
# with the email using a '%':
#
# user@host.domain%password!http://www.insecure.org/
#
# which means this script does user authentication.
#
# Note that redirects and Permanent Moves and cgi will not
# necessarily be handled. We are working on it, however.
#
# this script processes results one per minute to avoid trouble.
#
# this script logs all actions verbosely.
#
# copyright 2003 Philip S. Ehrens <phil@slug.org>
# assume GPL for all copyright and license concerns
#
# don't touch these!
set ::request_rx {Subject:\s+(\S+@\S+)!(http://\S+)}
set ::img_rx {src=(\S+\.(jpg|gif|png))}
## CONFIGURATION SECTION ##
# must be a usable smtp mail server
set ::mailhost localhost
# From: user for outgoing mail
set ::hostuser foo@bar.edu
# working directory for this script, all files
# created by this script will be created there.
set ::topleveldir .httpOverSmtp
# authorized users of this service and their password
# (optional. delimited by % from their email as in these
# examples.)
set ::valid_users foo@bar.edu%f0o
lappend ::valid_users bar@bar.edu%b4r
# this mailbox will be truncated each time it is read,
# so it should not be a user's mailbox, but a special
# one for use by this script. you can use procmail or
# mutt hooks or whatever to get the messages into it.
#
# to test this daemon you can create this file with a
# single line with a properly formatted Subject: line
# in it as described above. then start the program and
# it will process the request.
set ::mailbox httptunnel
# set the variable ::TEST to 1 to cause script to operate
# in "one shot" test mode. all output will be preserved
# in ::topleveldir, and no mail will be sent. script will
# exit after creating tarball in ::topleveldir.
set ::TEST 1
# run as a daemon, or once per invocation?
set ::DAEMON_MODE 0
# name of the log file in the ::topleveldir
set ::logfile httpOverSmtp.log
## END OF CONFIGURABLES ##
namespace eval smtp {}
proc smtp::parsebox { } {
if { [ catch {
;## read the mailbox
set fid [ open $::mailbox r ]
set data [ read $fid [ file size $::mailbox ] ]
close $fid
set fid [ open $::mailbox w ]
close $fid
set data [ split $data \n ]
# find formatted subject lines and validate email address
foreach line $data {
if { [ regexp $::request_rx $line -> email url ] } {
if { [ authorisedUser $email ] } {
set email [ lindex [ split $email % ] 0 ]
set request [ list $email $url ]
if { [ lsearch $::requests $request ] == -1 } {
lappend ::requests $request
}
} else {
set msg "smtp::parsebox: unauthorised user, "
append msg "request rejected: '$email $url'"
log $msg
}
}
}
} err ] } {
catch { close $fid }
log "smtp::parsebox: $err"
}
}
proc smtp::send { mailhost from to subject text { attachments "" } } {
if { [ catch {
set seqpt {}
set seqpt "socket($mailhost,25):"
set sid [ socket $mailhost 25 ]
set seqpt {}
set from_rx {^\{?\"?([^\"]+)\"?\s+<([^>]+)>\}?$}
if { [ regexp $from_rx $from -> name address ] } {
set from "\"$name\" <$address>"
} else {
set from [ string trim $from >< ]
set from "<$from>"
}
fconfigure $sid -buffering line
fileevent $sid readable [ list smtp::handle gets $sid ]
smtp::handle puts $sid "HELO localhost"
set bare [ string trim [ lindex $from end ] >< ]
smtp::handle puts $sid "MAIL From:<$bare>"
smtp::handle puts $sid "RCPT To:<$to>"
smtp::handle puts $sid DATA
smtp::handle puts $sid "From: $from"
smtp::handle puts $sid "To: $to"
smtp::handle puts $sid "Subject: $subject"
set text [ smtp::multipart $text $attachments ]
foreach line [ split $text "\n" ] {
smtp::handle puts $sid $line
}
smtp::handle puts $sid ".\nQUIT"
::close $sid
} err ] } {
catch { ::close $sid }
log "smtp::send:$seqpt $err"
}
}
# manages the socket connected to the smtp server
proc smtp::handle { action sid { line "" } } {
if { [ catch {
fconfigure $sid -blocking off
$action $sid $line
fconfigure $sid -blocking on
} err ] } {
# nothing to be done, socket has gone
}
}
# turns tarball into base64
proc smtp::dump { attachment } {
if { [ catch {
set fid [ open $attachment r ]
fconfigure $fid -encoding binary
fconfigure $fid -translation binary
set data [ read $fid [ file size $attachment ] ]
close $fid
set data [ smtp::encode64 $data ]
} err ] } {
catch { close $fid }
return -code error "smtp::dump: $err"
}
return $data
}
# attaches base64 encoded tarball to outgoing email
proc smtp::multipart { text { attachments "" } } {
if { [ catch {
set attachments [ split $attachments , ]
set boundary HTTP-over-SMTP_attachment_HTTP-over-SMTP
if { [ string length $attachments ] } {
set msg "MIME-Version: 1.0\n"
append msg "Content-Type: multipart/mixed;\n"
append msg " boundary=\"$boundary\"\n\n"
append msg "--$boundary\n"
append msg "Content-Type: text/plain; charset=US-ASCII\n\n"
append msg "$text\n"
foreach attachment $attachments {
set attachment [ string trim $attachment ]
if { ! [ file readable $attachment ] } {
set err "attachment not found: '$attachment'"
return -code error $err
}
append msg "--$boundary\n"
append msg "Content-Type: application/octet-stream\n"
append msg "Content-Transfer-Encoding: base64\n"
append msg "Content-Disposition: attachment;\n"
append msg " filename=\"[ file tail $attachment ]\"\n\n"
append msg [ smtp::dump $attachment ]
}
append msg "\n\n--${boundary}--\n\n"
} else {
set msg "\n$text"
}
} err ] } {
return -code error "smtp::multipart: $err"
}
return $msg
}
# internal base64 encoding engine. really slow!!
proc smtp::encode64 { string } {
set i 0
foreach char [ list A B C D E F G H I J K L M N O P Q R S \
T U V W X Y Z a b c d e f g h i j k l m n o \
p q r s t u v w x y z 0 1 2 3 4 5 6 7 8 9 + / ] {
set tmp($char) $i
lappend b64 $char
incr i
}
set wrapchar "\n"
set maxlen 60
set result {}
set state 0
set length 0
binary scan $string c* X
foreach { x y z } $X {
if { $maxlen && $length >= $maxlen } {
append result $wrapchar
set length 0
}
append result [ lindex $b64 [ expr {($x >>2) & 0x3F} ] ]
if { $y != {} } {
append result \
[ lindex $b64 \
[ expr {(($x << 4) & 0x30) | (($y >> 4) & 0xF)} ] ]
if { $z != {} } {
append result \
[ lindex $b64 \
[ expr {(($y << 2) & 0x3C) | (($z >> 6) & 0x3)} ] ]
append result \
[ lindex $b64 [ expr {($z & 0x3F)} ] ]
} else {
set state 2
break
}
} else {
set state 1
break
}
incr length 4
}
if { $state == 1 } {
append result \
[ lindex $b64 [ expr {(($x << 4) & 0x30)} ] ]==
} elseif { $state == 2 } {
append result \
[ lindex $b64 [ expr {(($y << 2) & 0x3C)} ] ]=
}
return $result
}
proc handleRequest { args } {
set request [ lindex $::requests 0 ]
if { [ string length $request ] } {
set ::requests [ lrange $::requests 1 end ]
foreach [ list email url ] $request { break }
log "getting $url for $email"
set localname [ localName $url ]
getCurlUrl $url $localname
set tarball [ getImages $url [ file dirname $localname ] ]
after 5000 [ list emailResult $email $url $tarball ]
}
}
proc emailResult { email url tarball } {
set local [ localName $url ]
if { [ string length $tarball ] } {
set local $tarball
}
set size unknown
catch { set size [ file size $local ] }
set time [ clock seconds ]
log "$email $url httpOverSmtp $local ($size bytes)"
smtp::send $::mailhost $::hostuser $email $url httpOverSmtp $local
file delete -force $local
}
# generic logging function
proc log { args } {
set timestamp [ clock format [ clock seconds ] ]
if { [ catch {
set fid [ open $::logfile a+ 0644 ]
} err ] } {
set fid stderr
}
puts $fid "${timestamp}: ${::requests}: $args"
close $fid
}
# turns remote URL into a local filename
proc localName { url } {
regsub {(http|ftp):/+} $url {} localname
set localname [ string trimleft $localname / ]
if { [ regexp {\/$} $localname ] } {
set localname ${localname}index.html
}
return $localname
}
# wrapper for curl
proc getCurlUrl { url localname } {
if { [ catch {
file mkdir [ file dirname $localname ]
if { [ file exists $localname ] } {
return {}
}
;## -s : no progress meter
;## -N : no output buffer
;## -w %{size_download} : how many bytes were read?
set msg [ exec curl -s -N --connect-timeout 10 -m 60 -w %{size_download} $url -o $localname ]
if { [ regexp {^\d+$} $msg ] } {
set msg "$url $msg bytes"
}
log "getCurlUrl: $msg"
} err ] } {
if { [ string length $err ] } {
log "getCurlUrl: $err"
}
}
}
# iterator for retrieving images
proc getImages { url subdir } {
if { [ catch {
set data [ list ]
set tarball [ list ]
set localname [ localName $url ]
# if it was an html page we parse it and handle
# all the images.
if { [ regexp {html?$} $localname ] } {
set url [ split $url / ]
set url [ join [ lrange $url 0 end-1 ] / ]
set url ${url}/
set fid [ open $localname r ]
set data [ read $fid [ file size $localname ] ]
close $fid
set output [ list ]
foreach line [ split $data "\n" ] {
if { [ regexp -nocase $::img_rx $line -> image ] } {
set image [ string trim $image "'\"" ]
if { ! [ regexp {^(http|ftp):/+} $image ] } {
set image $url$image
}
set localimg [ localName $image ]
regsub "^$subdir\/" $localimg {} localimg
regsub $image $line $localimg line
set localimg $subdir/$localimg
getCurlUrl $image $localimg
}
append output "$line\n"
}
set fid [ open $localname w ]
puts $fid $output
close $fid
# otherwise we're done
} elseif { ! [ regexp {/$} $url ] } {
set url ${url}/
}
set tarball [ tarUp $subdir ]
} err ] } {
catch { close $fid }
log "getImages: $err"
}
return $tarball
}
proc tarUp { subdir } {
set tarball ${subdir}.tar.gz
exec tar czf $tarball $subdir
if { $::TEST == 1 } { exit }
file delete -force $subdir
return $tarball
}
# simpleminded user auth
proc authorisedUser { user } {
set auth 0
if { [ lsearch -exact $::valid_users $user ] > -1 } {
set auth 1
}
return $auth
}
# scheduler
proc runner { } {
smtp::parsebox
handleRequest
if { $::DAEMON_MODE == 0 } {
exit
}
after 60000 runner
}
# Tcl background error handler
proc bgerror { args } {
log "bgerror: $args"
}
## :TODO: make image retrieval optional.
## MAIN ##
file mkdir $::topleveldir
cd $::topleveldir
set ::requests [ list ]
log "START!"
runner
vwait foreverTV I remember a place where I worked (for hardly any pay) where email would work via Lotus Notes, and the 'firewall' would be passed by the equivalent of an old lorry, every, what, hour or so, so that the even the above approach would leave you to wait such amount of time to get your url message accross and your http reply back... Your provider/mailbox maintainer might also not like you for it, for instance for pages containing many large images, streaming is absent in this way, so you get all you called for. Kind of fun.

