Updated 2011-08-31 12:26:18 by RLE

SEH A delta virtual filesystem.

This virtual filesystem is designed to be used with a versioning virtual filesystem stacked on top of it. As the versioning filesystem generates a new separate file for every file edit, this filesystem will invisibly generate and manage deltas of the separate versions to save space.

It is designed to be used with the procedures posted on the tdelta page, but you can substitute the delta technology of your choice simply by overloading the tdelta and tpatch procedures.

The code includes logic to keep the new file intact if its delta with the existing file turns out to be bigger, or if the two versions are so different that the expense of generating a delta seems unwarranted (i.e., it won't waste time generating deltas of two jpeg or zip file versions if they have almost no content in common).
 # deltavfs.tcl --
 # 
 #        A delta virtual filesystem.
 #
 #        This virtual filesystem is designed to be used with the versioning virtual filesystem 
 #        at http://wiki.tcl.tk/11980 stacked on top if it.  As the versioning filesystem generates 
 #        a new separate file for every file edit, this filesystem will invisibly generate and manage
 #        deltas of the separate versions to save space.
 #
 # Written by Stephen Huntley (stephen.huntley@alum.mit.edu)
 #
 # Install: This code requires that the template vfs (http://wiki.tcl.tk/11938) procedures have already
 # been sourced into the interpreter.  The procedures tpatch and tdelta provided by sourcing tdelta.tcl 
 # at http://wiki.tcl.tk/12803 must also be available.
 # 
 # Usage: Mount <existing directory> <delta directory to be used as existing directory by versioning vfs>
 #

 package require vfs 1

 namespace eval ::vfs::template::version::delta {}

 proc ::vfs::template::version::delta::Mount {args} {
        eval [info body ::vfs::template::Mount]
 }

 namespace eval ::vfs::template::version::delta {

 proc MountProcedure {args} {
        foreach templateProc "Mount Unmount CloseTrace handler" {
                set infoArgs [info args ::vfs::template::$templateProc]
                set infoBody [info body ::vfs::template::$templateProc]
                proc $templateProc $infoArgs $infoBody
        }

        if {[lindex $args 0] == "-volume"} {
                set args [lrange $args 1 end]
                set to [lindex $args 1]
        } else {
                set to [file normalize [lindex $args 1]]
        }
        set path [file normalize [lindex $args 0]]
        if [info exists ::vfs::_unmountCmd($to)] {::vfs::unmount $to}

        file mkdir $path
        array unset ::vfs::_unmountCmd $to
        lappend pathto $path
        lappend pathto $to
        return $pathto
 }

 proc UnmountProcedure {path to} {
        return
 }

 proc Access {path root relative actualpath mode} {
        set fileNames [glob -nocomplain -path [file join $path $relative] *]
        if [catch {::vfs::filesystem info $path}] {append fileNames " [glob -nocomplain -path [file join $path $relative] -type hidden *]"}
        set fileName [lindex $fileNames 0]
        if [set i [expr [lsearch -exact $fileNames [file join $path $relative]] + 1]] {set fileName [lindex $fileNames [incr i -1]]}

        set modeString [::vfs::accessMode $mode]
        if {$modeString == "F"} {
                if [file exists $fileName] {return}
                error "no such file or directory"
        }
        set modeString [split $modeString {}]
        set fileString {}
        if {[string equal $modeString "R"] && [file readable $fileName]} {return}
        if {[string equal $modeString "W"] && [file writable $fileName]} {return}
        if {[string equal $modeString "X"] && [file executable $fileName]} {return}
        file stat $fileName stat
        foreach { mask pairs } {
                00400 { 00400 r }
                00200 { 00200 w }
                04100 { 04100 s 04000 S 00100 x }
                00040 { 00040 r }
                00020 { 00020 w }
                02010 { 02010 s 02000 S 00010 x }
                00004 { 00004 r }
                00002 { 00002 w }
                01001 { 01001 t 01000 T 00001 x }
            } {
                set value [expr $stat(mode) & $mask]
                set bit -
                foreach { x b } $pairs {
                    if { $value == $x } {
                        set bit $b
                    }
                }
                append bitString $bit
      }
        set readable [regexp -all "r" $bitString]
        set writable [regexp -all "w" $bitString]
        set executable [regexp -all "x" $bitString]
        foreach {mode count} "R $readable W $writable X $executable" {
                if {([string first $mode $modeString] > -1) && !$count} {error "$mode access not allowed"}
        }
        if [string equal $modeString "X W"] {
                if {($writable == 3) && ($executable == 3)} {
                        return
                } elseif [file writable $fileName] {
                        if {[regexp -all "wx" $bitString] == $writable} {
                                return
                        } elseif [file executable $fileName] {
                                return
                        }
                }
        }
        if [string equal $modeString "R W"] {
                if {($writable == 3) && ($readable == 3)} {
                        return
                } elseif [file writable $fileName] {
                        if {[regexp -all "rw" $bitString] == $writable} {
                                return
                        } elseif [file readable $fileName] {
                                return
                        }
                }
        }
        if [string equal $modeString "R X"] {
                if {($readable == 3) && ($executable == 3)} {
                        return
                } elseif [file executable $fileName] {
                        if {[regexp -all {r[w-]x} $bitString] == $executable} {
                                return
                        } elseif [file readable $fileName] {
                                return
                        }
                }
        }

        foreach mS $modeString {
                set errorMessage "not [string map {R readable W writable X executable} $mS]"
                if {[lsearch $fileString $mS] == -1} {error $errorMessage}
        }
 }

 proc CreateDirectory {path root relative actualpath} {
        file mkdir [file join $path $relative]
 }

 proc DeleteFile {path root relative actualpath} {
        set fileName [file join $path $relative]
        set timeStamp [lindex [split [file tail $fileName] \;] 1]
        if [string equal $timeStamp {}] {
                catch {file delete $fileName} result
                return
        }
        set targetFile [Reconstitute $fileName $path]
        set referenceFiles [glob -directory [file dirname $fileName] -nocomplain *vfs&delta$timeStamp]
        if [catch {::vfs::filesystem info $path}] {append referenceFiles " [glob -directory [file dirname $fileName] -nocomplain -type hidden *vfs&delta$timeStamp]"}
        foreach referenceFile $referenceFiles {
                regsub {\;vfs&delta[0-9]*$} $referenceFile "" reconFile]
                set f [open $referenceFile r]
                fconfigure $f -translation binary
                set signature [read $f]
                close $f
                tpatch $targetFile $signature $reconFile
                file delete $referenceFile
        }
        close $targetFile
        file delete $fileName
        return
 }

 proc FileAttributes {path root relative actualpath} {
        set fileNames [glob -nocomplain -path [file join $path $relative] *]
        if [catch {::vfs::filesystem info $path}] {append fileNames " [glob -nocomplain -path [file join $path $relative] -type hidden *]"}
        set fileName [lindex $fileNames 0]
        if [set i [expr [lsearch -exact $fileNames [file join $path $relative]] + 1]] {set fileName [lindex $fileNames [incr i -1]]}
        file attributes $fileName
 }

 proc FileAttributesSet {path root relative actualpath attribute value} {
        set fileNames [glob -nocomplain -path [file join $path $relative] *]
        if [catch {::vfs::filesystem info $path}] {append fileNames " [glob -nocomplain -path [file join $path $relative] -type hidden *]"}
        set fileName [lindex $fileNames 0]
        if [set i [expr [lsearch -exact $fileNames [file join $path $relative]] + 1]] {set fileName [lindex $fileNames [incr i -1]]}
        file attributes $fileName $attribute $value
 }

 proc MatchInDirectory {path root relative actualpath pattern types} {
        if [::vfs::matchDirectories $types] {lappend typeString d}
        if [::vfs::matchFiles $types] {lappend typeString f}
        
        set globList [glob -directory [file join $path $relative] -nocomplain -types $typeString $pattern]
        if [catch {::vfs::filesystem info $path}] {append globList " [glob -directory [file join $path $relative] -nocomplain -types "$typeString hidden" $pattern]"}
        set pathLength [expr [string length $path] - 1]
        set newGlobList {}
        foreach gL $globList {
                set gL [string replace $gL 0 $pathLength $root]
                regsub {\;vfs&delta.*$} $gL "" gL
                lappend newGlobList $gL
        }
        return $newGlobList
 }

 proc Open {path root relative actualpath mode permissions} {
        set fileNames [glob -nocomplain -path [file join $path $relative] *]
        if [catch {::vfs::filesystem info $path}] {append fileNames " [glob -nocomplain -path [file join $path $relative] -type hidden *]"}
        set fileName [lindex $fileNames 0]
        if [set i [expr [lsearch -exact $fileNames [file join $path $relative]] + 1]] {set fileName [lindex $fileNames [incr i -1]]}

        set newFile 0
        if ![file exists $fileName] {set newFile 1}
        set fileName [file join $path $relative]
        set channelID [Reconstitute $fileName $path]
        if [string equal $channelID {}] {set channelID [open $fileName $mode] ; close $channelID ; set channelID [::vfs::memchan]}
        if $newFile {catch {file attributes $fileName -permissions $permissions}}
        return $channelID
 }

 proc Close {channelID path root relative actualpath mode} {
 # Do not close the channel in the close callback!
 # It will crash Tcl!  Honest!
 # The core will close the channel after you've taken what info you need from it.

 #        close $channelID
        if {$mode == "r"} {return}
        set fileName [file join $path $relative]
        set f [open $fileName w]
        fconfigure $f -translation binary
        seek $f 0
        seek $channelID 0
        fcopy $channelID $f
        close $f
        Delta $fileName $path
        return
 }

 proc RemoveDirectory {path root relative actualpath} {
        catch {file delete [file join $path $relative]}
 }

 proc Stat {path root relative actualpath} {
        set fileNames [glob -nocomplain -path [file join $path $relative] *]
        if [catch {::vfs::filesystem info $path}] {append fileNames " [glob -nocomplain -path [file join $path $relative] -type hidden *]"}
        set fileName [lindex $fileNames 0]
        if [set i [expr [lsearch -exact $fileNames [file join $path $relative]] + 1]] {set fileName [lindex $fileNames [incr i -1]]}
        
        set endtag [lindex [split $fileName \;] end]
        if {[string first "vfs&delta" $endtag] || [string equal "vfs&delta" $endtag]} {file stat $fileName fs ; return [array get fs]}
        set f [open $fileName r]
        fconfigure $f -translation binary
        set copyinstructions [read $f]
        array set fileStats [lindex $copyinstructions 3]
        unset copyinstructions
        close $f
        set size $fileStats(size)
        file stat $fileName fs
        set fs(size) $size
        return [array get fs]
 }

 proc Utime {path root relative actualpath atime mtime} {
        set fileNames [glob -nocomplain -path [file join $path $relative] *]
        if [catch {::vfs::filesystem info $path}] {append fileNames " [glob -nocomplain -path [file join $path $relative] -type hidden *]"}
        set fileName [lindex $fileNames 0]
        if [set i [expr [lsearch -exact $fileNames [file join $path $relative]] + 1]] {set fileName [lindex $fileNames [incr i -1]]}

        file atime $fileName $atime
        file mtime $fileName $mtime
 }

 proc Delta {filename path} {
        set fileRoot [lindex [split [file tail $filename] \;] 0]
        set fileNames [glob -nocomplain -path [file join [file dirname $filename] $fileRoot] *]
        if [catch {::vfs::filesystem info $path}] {append fileNames " [glob -nocomplain -path [file join [file dirname $filename] $fileRoot] -type hidden *]"}
        set nonDeltas {}
        foreach fn $fileNames {
                set endtag [lindex [split $fn \;] end]
                if ![string first "vfs&delta" $endtag] {continue}
                lappend nonDeltas $fn
                set atimes($fn) [file atime $fn]
        }
        if {[set deltaIndex [llength $nonDeltas]] < 2} {return}
        set nonDeltas [lsort -dictionary $nonDeltas]
        incr deltaIndex -1
        set i 0
        while {$i < $deltaIndex} {
                set referenceFile [lindex $nonDeltas $i]
                set targetFile [lindex $nonDeltas [incr i]]
                set signature [tdelta $referenceFile $targetFile $::trsync::blockSize 1 1]
                set targetTimeStamp [lindex [split $targetFile \;] 1]

                file stat $referenceFile fileStats
                set signatureSize [string length $signature]
                if {$signatureSize > $fileStats(size)} {
                        set fileName $referenceFile\;vfs&delta
                        file rename $referenceFile $fileName
                        continue
                }

                array set fileStats [file attributes $referenceFile]

                set fileName $referenceFile\;vfs&delta$targetTimeStamp
                set f [open $fileName w]
                fconfigure $f -translation binary
                puts -nonewline $f $signature
                close $f
                file delete $referenceFile
                array set fileAttributes [file attributes $fileName]
                if [info exists fileAttributes(-readonly)] {catch {file attributes $fileName -readonly 0}}
                if [info exists fileAttributes(-permissions)] {catch {file attributes $fileName -permissions rw-rw-rw-}}
                catch {file attributes $fileName -owner $fileStats(uid)}
                catch {file attributes $fileName -group $fileStats(gid)}
                
                catch {file mtime $fileName $fileStats(mtime)}
                catch {file atime $fileName $fileStats(atime)}

                foreach attr [array names fileStats] {
                        if [string first "-" $attr] {continue}
                        if [string equal [array get fileStats $attr] [array get fileAttributes $attr]] {continue}
                        if [string equal "-permissions" $attr] {continue}
                        catch {file attributes $fileName $attr $fileStats($attr)}
                }
                catch {file attributes $fileName -permissions $fileStats(mode)}
                catch {file attributes $fileName -readonly $fileStats(-readonly)}
        }
        foreach fn [array names atimes] {
                if ![file exists $fn] {continue}
                file atime $fn $atimes($fn)
        }
 }

 proc Reconstitute {fileName path} {
        if ![catch {set channelID [open $fileName r]}] {return $channelID}
        if ![catch {set channelID [open $fileName\;vfs&delta r]}] {return $channelID}
        set targetFiles [glob -nocomplain -path $fileName *]
        if [catch {::vfs::filesystem info $path}] {append targetFiles " [glob -nocomplain -path $fileName -type hidden *]"}
        set targetFile [lindex $targetFiles 0]

        set targetFile [string trim $targetFile]
        if [string equal $targetFile {}] {return}
         set fileStack {}
        while {[string first "\;vfs&delta" $targetFile] > -1} {
                if ![regexp {\;vfs&delta([0-9]+)$} $targetFile trash targetTime] {break}
                set fileStack "[list $targetFile] $fileStack"
                set targetFiles [glob -directory [file dirname $fileName] *\;$targetTime*]
                if [catch {::vfs::filesystem info $path}] {append targetFiles " [glob -directory [file dirname $fileName] -nocomplain -type hidden *\;$targetTime*]"}
                set targetFile [lindex $targetFiles 0]

                set atimes($targetFile) [file atime $targetFile]
        }
        set targetFile [open $targetFile r]
        foreach fs $fileStack {
                set f [open $fs r]
                fconfigure $f -translation binary
                set copyInstructions [read $f]
                close $f
                set fileToConstruct [::vfs::memchan]
                tpatch $targetFile $copyInstructions $fileToConstruct
                catch {close $targetFile}
                set targetFile $fileToConstruct
        }
        foreach fn [array names atimes] {
                file atime $fn $atimes($fn)
        }
        fconfigure $targetFile -translation auto
        seek $targetFile 0
        return $targetFile
 }

 }
 # end namespace eval ::vfs::template::version::delta

See Also: