EMJ An email is a tree of parts. Feed a file containing a raw email to the script below, it will build a tree, then use it to save all the parts separately as well as giving you the tree structure.
I used this to learn about Tcllib MIME, struct::tree, and sha1.
The tree you get is a file that looks like this:
leaf 69a2faa59d {params {format flowed charset ISO-8859-1} content text/plain encoding 7bit}
leaf 3210ba9e34 {params {charset ISO-8859-1} content text/html encoding 7bit}
wrapper d0a17db3c7 {content multipart/alternative} 69a2faa59d 3210ba9e34
leaf ce9a16e7d3 {params {name {Globe.pdf}} content application/pdf encoding base64}
wrapper 12102a8c68 {content multipart/mixed} d0a17db3c7 ce9a16e7d3
This is the tree of parts, with the root at the bottom. Actually the structure of this example is:
+--leaf 69a2faa59d
+--leaf 3210ba9e34
+--wrapper d0a17db3c7
+--leaf ce9a16e7d3
wrapper 12102a8c68
The root is always a "wrapper", with the main headers saved to a file. Other "wrappers" also have headers saved to a file (which may be empty). A "leaf" has headers (saved to a file) and a body (decoded and saved to a file). The name of a "leaf" (and its two files) is the 40-character (shortened to 10 above) of the data. The name of a wrapper and its header file (and the tree file for the root wrapper) is the hash of the list of hashes of its immediate children.
This is 7 or 8 years old, and I'm afraid there aren't many comments.
#!/bin/sh
# the next line restarts using tclsh \
exec tclsh "$0" ${1+"$@"}
package require mime
package require sha1
package require struct::tree
proc buildmimetree {token node} {
set props [mime::getproperty $token -names]
if {[lsearch $props parts] >= 0} {
# this one has parts
set partlist [mime::getproperty $token parts]
foreach pt $partlist {
# create a tree node for the part
stree insert $node end $pt
# process the part in its turn
buildmimetree $pt $pt
}
}
}
proc mimeleaf {node} {
set sha1tok [sha1::SHA1Init]
set df [open $node.data w]
# must be in binary mode
fconfigure $df -translation binary
set hash [mime::getbody $node -command [list bodypart $df $sha1tok]]
close $df
if [file exists $hash.data] {
file delete $hash.data
}
file rename $node.data $hash.data
stree set $node hash $hash
savemimedata $hash $node
}
proc mimewrapper {node} {
set childlist [list]
foreach child [stree children $node] {
lappend childlist [stree get $child hash]
}
set hash [sha1::sha1 $childlist]
stree set $node hash $hash
savemimedata $hash $node
}
proc savemimedata {hash node} {
# write out headers
set hf [open $hash.hdr w]
fconfigure $hf -translation crlf
writeheaders $hf $node
close $hf
}
proc fixprops {node {type l}} {
array set props [mime::getproperty $node]
array unset props parts
array unset props size
if {[string length $props(encoding)] == 0} {
array unset props encoding
}
if {[string equal $type w]} {
array unset props encoding
}
array set pars $props(params)
array unset pars boundary
if {[llength [array names pars]] > 0} {
set props(params) [array get pars]
} else {
array unset props params
}
return [list [array get props]]
}
proc writeheaders {channel node} {
foreach header [::mime::getheader $node -names] {
if {[string first "From " $header] == 0} {
continue
}
# puts $channel "$header: [join [mime::getheader $node $header]]"
# Keep repeats separate (good for Received:)
foreach occ [mime::getheader $node $header] {
puts $channel "$header: $occ"
}
}
}
proc bodypart {channel sha1token type {data ""}} {
switch -exact -- $type {
data {
puts -nonewline $channel $data
sha1::SHA1Update $sha1token $data
}
end {
return [sha1::Hex [sha1::SHA1Final $sha1token]]
}
}
}
# -------- start of mainline code
if {[llength $argv] != 1} {
puts "Usage : mailbreak filename"
exit 1
}
set fname [lindex $argv 0]
if {![file readable $fname]} {
puts "Can't open $fname"
exit 1
}
# tokenize our file
set token [mime::initialize -file $fname]
# create a tree to use
struct::tree stree
# and rename the root after the main token
stree rename root $token
buildmimetree $token $token
# walk the tree bottom-up collecting the pieces
set toplevel [stree rootname]
stree walk $toplevel -order post node {
if [stree isleaf $node] {
mimeleaf $node
} else {
mimewrapper $node
}
}
# write something that could be a script to recreate the whole thing
set fname [stree get $toplevel hash]
set tf [open $fname.tree w]
stree walk $toplevel -order post node {
if [stree isleaf $node] {
puts $tf "leaf [stree get $node hash] [fixprops $node l]"
} else {
set childlist [list]
foreach child [stree children $node] {
lappend childlist [stree get $child hash]
}
puts $tf "wrapper [stree get $node hash] [fixprops $node w] $childlist"
}
}
close $tf
exit 0