- RFC 1950 [1] - Zlib Compressed Data Format
- RFC 1951 [2] - Deflate Compressed Data format
- RFC 1952 [3] - Gzip File Format
gunzip a file with zlib and Tcl
proc gunzip { file {outfile ""} } { package require zlib # Gunzip the file # See http://www.gzip.org/zlib/rfc-gzip.html for gzip file description set in [open $file r] fconfigure $in -translation binary -buffering none set id [read $in 2] if { ![string equal $id \x1f\x8b] } { error "$file is not a gzip file." } set cm [read $in 1] if { ![string equal $cm \x8] } { error "$file: unknown compression method" } binary scan [read $in 1] b5 FLAGS puts $FLAGS foreach {FTEXT FHCRC FEXTRA FNAME FCOMMENT} [split $FLAGS ""] {} binary scan [read $in 4] i MTIME set XFL [read $in 1] set OS [read $in 1] if { $FEXTRA } { binary scan [read $in 2] S XLEN set ExtraData [read $in $XLEN] } set name "" if { $FNAME } { set XLEN 1 set name [read $in $XLEN] set c [read $in 1] while { $c != "\x0" } { append name $c set c [read $in 1] } } set comment "" if { $FCOMMENT } { set c [read $in 1] while { $c != "\x0" } { append comment $c set c [read $in 1] } } set CRC16 "" if { $FHCRC } { set CRC16 [read $in 2] } set cdata [read $in] close $in binary scan [string range $cdata end-7 end] ii CRC32 ISIZE set data [zlib inflate [string range $cdata 0 end-8]] if { $CRC32 != [zlib crc32 $data] } { error "gunzip Checksum mismatch." } if { $outfile == "" } { set outfile $file if { [string equal -nocase [file extension $file] ".gz"] } { set outfile [file rootname $file] } } if { [string equal $outfile $file] } { error "Will not overwrite input file. sorry." } set out [open $outfile w] fconfigure $out -translation binary -buffering none puts -nonewline $out $data close $out file mtime $outfile $MTIME }18Aug04 PS
LES: Would someone tell me HOW this is better than [exec gzip filename]?PS: gzip might not be installed? And with a small tweak, you'd just get the file content - put that together with vfs::tar and you might be able to mount tar.gz files...SRIV Stock Windows installs don't have gzip. Think cross platform. "Better" is in the eye of the beholder. Nice work.
DKF: Here's a cheap way to invoke gzip on Windows. Note that just using exec gzip -c <<$d does not work because of translation issues.
proc gzip d { set data [open foo.tmp w] fconfigure $data -translation binary puts -nonewline $data $d close $data set f [open "|gzip -c <foo.tmp" r] fconfigure $f -translation binary set d [read $f] close $f after 100 ;# Ugly hack to give gzip time to exit so we can kill foo.tmp on Windows file delete foo.tmp return $d }
14Dec06 gl : Here is a way to read and write gzip .gz files transparently and on the fly -- including channels like "stdin" -- with the help of Trf:
package require Trf namespace eval gz { variable CRC # Attach to a channel for writing -- i.e., write .gz header, enable compression proc attach_w {f} { l for writing -- i.e., write .gz header, enable compression # Write header puts -nonewline $f [binary format "H*iH*" "1f8b0800" [clock seconds] "0003"] set CRC [binary format x4] # Init/attach compression zip -attach $f -mode compress -nowrap 1 fconfigure $f -translation binary -encoding binary # Init/attach CRC crc-zlib -attach $f -mode transparent -write-destination ::gz::CRC($f) -write-type variable fconfigure $f -translation binary -encoding binary fconfigure $f -translation binary -encoding binary return $f } # Detach from a channel for writing -- i.e., write .gz footer incl. CRC proc detach_w {f} { variable CRC set SIZE [tell $f] unstack $f ; # CRC unstack $f ; # gzip puts -nonewline $f $CRC($f) puts -nonewline $f [binary format "i" [expr $SIZE % 0x100000000]] } # Attach to a channel for reading -- i.e., read and check .gz header, enable decompression proc attach_r {f} { # (using code from [http://wiki.tcl.tk/6175]) set id [read $f 2] if { ![string equal $id \x1f\x8b] } { error "GZip channel $f: not a gzip file." } set cm [read $f 1] if { ![string equal $cm \x8] } { error "GZip channel $file: unknown compression method." } binary scan [read $f 1] b5 FLAGS puts $FLAGS foreach {FTEXT FHCRC FEXTRA FNAME FCOMMENT} [split $FLAGS ""] {} binary scan [read $f 4] i MTIME set XFL [read $f 1] set OS [read $f 1] if { $FEXTRA } { binary scan [read $f 2] S XLEN set ExtraData [read $f $XLEN] } set name "" if { $FNAME } { set c [read $f 1] while { $c != "\x0" } { append name $c set c [read $f 1] } } set comment "" if { $FCOMMENT } { set c [read $f 1] while { $c != "\x0" } { append comment $c set c [read $f 1] } } set CRC16 "" if { $FHCRC } { set CRC16 [read $f 2] } # Init/attach decompression zip -attach $f -mode compress -nowrap 1 fconfigure $f -translation binary -encoding binary # Init/attach CRC crc-zlib -attach $f -mode transparent -read-destination ::gz::CRC($f) -read-type variable fconfigure $f -translation binary -encoding binary return $f } # Detach from a channel for reading -- i.e., check .gz footer incl. CRC proc detach_r {f} { nel for reading -- i.e., check .gz footer incl. CRC variable CRC set cmpSize [expr [tell $f] % 0x100000000] binary scan $CRC($f) i cmpCRC unstack $f ; # CRC unstack $f ; # gzip binary scan [read $f 4] i gzCRC binary scan [read $f 4] i gzSize binary scan [read $f 4] i gzSize if {$gzCRC != $cmpCRC} { error "GZip channel $f: CRC mismatch." } if {$gzSize != $cmpSize} { error "GZip channel $f: Size mismatch." } } } # Demo program: # Decompress to stdout: gztest.tcl filename.gz # Compress to stdout: gztest.tcl filename if {[llength $argv] != 1} { puts "Usage: gztest.tcl filename" exit -2 } set fn [lindex $argv 0] set f [open $fn r] fconfigure $f -translation binary if {[string match "*.gz" $fn]} { # Is .gz ::gz::attach_r $f fcopy $f stdout ::gz::detach_r $f } else { # Is not .gz ::gz::attach_w stdout fcopy $f stdout ::gz::detach_w stdout }
To save people headaches working out how to do this for HTTP streams:
set gzip [binary format "H*iH*" "1f8b0800" [clock seconds] "0003"] append gzip [zlib deflate $content] append gzip [binary format i [zlib crc32 $content]] append gzip [binary format i [string length $content]] set content-encoding gzip(CMcC modded this snipped 10Jul07 after much head-scratching and some experimentation)BAS I was able to send gzipped content by doing:
fconfigure $chan -translation binary set gzip [zlib gzip $content -header [list crc [zlib crc32 $content] time [clock seconds] os 3]] set content-encoding gzipos 3 just means it was generated from Unix OS.