



LV According to Slashdot

glennj this version is a translation of the algorithm as shown at wikipedia

namespace eval LZW { variable char2int variable chars for {set i 0} {$i < 256} {incr i} { set char [binary format c $i] set char2int($char) $i lappend chars $char } } proc LZW::encode {data} { variable char2int array set dict [array get char2int] set w "" set result [list] foreach c [split $data ""] { set wc $w$c if {[info exists dict($wc)]} { set w $wc } else { lappend result $dict($w) set dict($wc) [array size dict] set w $c } } lappend result $dict($w) } proc LZW::decode {cdata} { variable chars set dict $chars set k [lindex $cdata 0] set w [lindex $dict $k] set result $w foreach k [lrange $cdata 1 end] { set currSizeDict [llength $dict] if {$k < $currSizeDict} { set entry [lindex $dict $k] } elseif {$k == $currSizeDict} { set entry $w[string index $w 0] } else { error "invalid code ($k) in ($cdata)" } append result $entry lappend dict $w[string index $entry 0] set w $entry } return $result } set s TOBEORNOTTOBEORTOBEORNOT# set e [LZW::encode $s] ;# ==> 84 79 66 69 79 82 78 79 84 256 258 260 265 259 261 263 35 set d [LZW::decode $e] ;# ==> TOBEORNOTTOBEORTOBEORNOT# # or expr {$s eq [LZW::decode [LZW::encode $s]]} ;# ==> 1
PT 13-Jun-2003: Is this code compatible with anything? compress(1), gzip(1) etc? Any references relevant to this code?
13-Jun-2003: As it stands, this code is not terribly useful other than as an exercise because
- its not really compatible with anything other than itself
- it returns a list of integers (the indices into the dictionary) rather than a binary compressed string
- the dictionary grows without bound, rather than being adaptive

SEH The code below makes some changes for speed and expresses the results in a simple encoded format rather than a list of integers.
namespace eval ::lzw { proc Compress {data} { if {$data == {}} {return {}} set cpre {} for {set x 0} {$x < 256} {incr x} {set dict([binary format c $x]) $x} set pos 0 set rval {} set string_length_data [string length $data] while {$pos < $string_length_data} { set ch [string index $data $pos] incr pos set ci [array names dict -exact $cpre$ch] if {$ci != {}} { # string in dictionary append cpre $ch } else { set dict($cpre$ch) [array size dict] lappend rval $dict($cpre) set cpre $ch } } lappend rval $dict($cpre) foreach rv $rval { if {$rv == 38} { append rvalEncode "&0;" } elseif {$rv == 59} { append rvalEncode "&1;" } elseif {$rv > 255} { set rv [expr $rv - 254] append rvalEncode "&$rv;" } else { append rvalEncode [binary format c $rv] } } set rvalEncode [string map {;& { }} $rvalEncode] puts "compressed from [string length $data] to [string length $rvalEncode]" return $rvalEncode } proc Decompress {cdataEncode} { if {$cdataEncode == {}} {return {}} set string_length_cdataEncode [string length $cdataEncode] set pos 0 while {$pos < $string_length_cdataEncode} { set strIndex [string index $cdataEncode $pos] if {$strIndex == "&"} { while {[set strIndex [string index $cdataEncode [incr pos]]] != "\;"} { if {$strIndex == { }} { if {$cDatum == 0} { set cDatum 38 } elseif {$cDatum == 1} { set cDatum 59 } else { set cDatum [expr $cDatum + 254] } lappend cdata $cDatum unset cDatum } append cDatum $strIndex } if {$cDatum == 0} { set cDatum 38 } elseif {$cDatum == 1} { set cDatum 59 } else { set cDatum [expr $cDatum + 254] } lappend cdata $cDatum unset cDatum } else { binary scan $strIndex c strIndex lappend cdata $strIndex } incr pos } set cpre {} set dict {} for {set x 0} {$x < 256} {incr x} {lappend dict [binary format c $x]} set pos 0 set rval {} set llength_cdata [llength $cdata] while {$pos < $llength_cdata} { set co [lindex $cdata $pos] incr pos if {$co >= [llength $dict]} { lappend dict $cpre[string index $cpre 0] set cpre [lindex $dict $co] } else { append cpre [string index [lindex $dict $co] 0] # this only won't apply for the very first character if {[string length $cpre] > 1} { lappend dict $cpre } set cpre [lindex $dict $co] } append rval [lindex $dict $co] } puts "uncompressed from [llength $cdata] to [string length $rval]" return $rval } } # end namespace eval ::lzw