This is an improved version of
purse, orginally by
CMcC. The code below attempts to minimise the amount of file descriptors that are kept open at one time. The original implementation would only close file descriptors on array removals, which can be problematic in virtual machines (I've had problems on Ubuntu in OpenVZ-based machines). Closing of file descriptors uses the following two heuristics:
- Journalling files are kept open for few seconds after an initial write to the array, then they get automatically closed. This supposes that your code will "burst" modifications to arrays.
- Only a restricted number of journalling files are kept open at all times. When a maximum is reached, the "oldest" ones are closed. This is usefull if you create and purse many arrays at once, for example when restarting from disk state.
The API is exactly the same as the one of
purse, except that there are more parameters that can be set for a purse (and good defaults, highlighted at the beginning of the code).
I have been also been looking at
tie, which is part of
tcllib. But the implementation also keeps the file descriptors opened forever, to what I can understand.
# purse makes tcl arrays purse-istant
package require Tcl 8.5
package provide Purse 0.3
namespace eval ::purse {
namespace eval vars {
variable nofile 128; # Default max number of journalling file to keep open
variable keep 5000; # Default number of milliseconds to keep journalling files open
variable debug ""; # File descritor for debug output (empty for none)
variable dtfmt "%Y%m%d %T"; # Format for date output in log (empty for none)
}
}
# param - sets a purse's parameters
# dir - directory in which purses are stored (default [pwd])
proc ::purse::param {var val {control purse}} {
variable $control
upvar 0 $control cont
if {![info exists cont]} {
#::purse::purse $control $control
set cont() [dict create dir [pwd]]
}
if {$var == "dir"} {
file mkdir $val
}
dict set cont() $var $val
Debug "Set parameter $var to $val in $control"
}
# Restart from current state on disk
proc ::purse::restart { {glob ::*} {control purse}} {
variable $control
upvar 0 $control cont
init $control
set arrays [glob -directory [dict get $cont() dir] \
-nocomplain \
-tails [string map [dict get $cont() todisk] $glob]]
foreach array $arrays {
set array [FromDisk $array]
# creates namespace ondemand, if necessary
set nspace [namespace qualifiers $array]
if { $nspace ne "" } {
namespace eval $nspace {}
}
R $control $array $array
}
return $arrays
}
# Initialise purse storage in control variable
proc ::purse::init { {control purse} } {
global tcl_platform
variable $control
upvar 0 $control cont
if {![info exists cont]} {
#::purse::purse $control $control
set cont() [dict create]
}
# Make sure we have a "dir" parameter, this will be the directory
# where we will be persisting arrays.
if {![dict exists $cont() dir]} {
param dir [pwd] $control
}
# Make sure we know how to map unallowed characters onto
# characters that can be used for file names.
if { ![dict exists $cont() todisk] } {
if { $tcl_platform(platform) eq "windows" } {
param todisk [list ":" "¨"] $control
} else {
param todisk {} $control
}
}
# The opposite...
if { ![dict exists $cont() fromdisk] } {
if { $tcl_platform(platform) eq "windows" } {
param fromdisk [list "¨" ":"] $control
} else {
param fromdisk {} $control
}
}
if {![dict exists $cont() nofile]} {
param nofile $vars::nofile $control
}
if {![dict exists $cont() keep]} {
param keep $vars::keep $control
}
}
# purse - purses an array
proc ::purse::purse {array {control purse}} {
#puts stderr "purse $array $control"
#set array [namespace which -variable $array]
upvar $array arr
variable $control
upvar 0 $control cont
init $control; # Initialise storage.
trace add variable arr read [list ::purse::R $control $array] ;# one shot load file
trace add variable arr array [list ::purse::R $control $array] ;# one shot load file
trace add variable arr write [list ::purse::W $control $array]
trace add variable arr unset [list ::purse::U $control $array]
# register a single purse flush at exit
set pfx ::exit_purse_[string map [list ":" ""] $control]_
if { [info commands ${pfx}*] eq "" } {
set newex ${pfx}[expr rand()]
rename ::exit $newex
proc ::exit {} [subst {
#puts stderr "flush $control"
::purse::flush * $control
$newex
}]
}
}
# flush arrays matching glob
proc ::purse::flush {{glob *} {control purse}} {
variable $control
upvar 0 $control cont
foreach {array d} [array get cont $glob] {
if { $array != "" } {
Serialize $array $control
}
}
}
# Return full path to where to store an array on disk.
proc ::purse::ToDisk { name {control purse}} {
variable $control
upvar 0 $control cont
init $control
return [file join [dict get $cont() dir] \
[string map [dict get $cont() todisk] $name]]
}
# Return name of array, given its name on disk.
proc ::purse::FromDisk { name {control purse}} {
variable $control
upvar 0 $control cont
init $control
return [string map [dict get $cont() fromdisk] $name]
}
# initializes pursed array - one shot
proc ::purse::R {control name array args} {
#puts stderr "read $control $name $array $args"
upvar $array arr
variable $control
upvar 0 $control cont
trace remove variable arr read [list ::purse::R $control $name] ;# one shot load file
trace remove variable arr array [list ::purse::R $control $name] ;# one shot load file
trace remove variable arr write [list ::purse::W $control $name]
trace remove variable arr unset [list ::purse::U $control $name]
set file [ToDisk $name $control]
if {[file exists $file]} {
# if the purse exists, load its contents to array
Debug "Loading content of array $name from $file"
set fd [open $file r+]
while {![eof $fd]} {
set content [gets $fd]
set len [llength $content]
if { [expr {$len%2}] } {
puts "Possibly corrupt data when reading $name"
set content [lrange $content 0 end-1]
}
array set arr $content
}
close $fd
Serialize $name $control
} else {
# brand new purse - create the file
Debug "Shadowing content of array $name to $file"
set fd [open $file w]
fconfigure $fd -buffering line
dict set cont($name) fd $fd
Spacer $name $control
AutoClose $name $control
}
# we no longer need a read trace
trace add variable arr write [list ::purse::W $control $name]
trace add variable arr unset [list ::purse::U $control $name]
}
# trace unset - writes an element to purse
proc ::purse::W {control name array el op} {
#puts stderr "write $control $name $array $el $op"
upvar $array arr
variable $control
upvar 0 $control cont
if {![info exists cont($name)]} {
R $control $name arr
}
# Append to existing file, reserialize all content if it had been closed
set fd [dict get $cont($name) fd]
if { $fd eq "" } {
Serialize $name $control
}
set fd [dict get $cont($name) fd]
array set junk [list $el $arr($el)]
puts $fd [array get junk]
Spacer $name $control
AutoClose $name $control
}
# trace unset - unsets an element in a pursed array
proc ::purse::U {control name array el op} {
#puts stderr "unset $control $name $array $el $op"
upvar $array arr
variable $control
upvar 0 $control cont
if {![info exists cont($name)]} {
r $control $name arr
if {$el != "" && [info exists arr($el)]} {
unset arr($el) ;# we have recreated the element - recurse
}
return
}
if {$el == ""} {
# removing the entire array - destroy the purse
set file [ToDisk $name $control]
CloseCmd $name $control
unset cont($name)
if {[file exists $file]} {
file delete -force -- $file
}
# we no longer need traces
trace remove variable arr write [list ::purse::W $control $name]
trace remove variable arr unset [list ::purse::U $control $name]
} else {
# removing an element - flush the purse
Serialize $name $control
}
}
# Conditional debug output, this cost close to zero.
proc ::purse::Debug {msg} {
if {$vars::debug ne ""} {
if { $vars::dtfmt eq "" } {
puts $vars::debug $msg
} else {
set dt [clock format [clock seconds] -format $vars::dtfmt]
puts $vars::debug "\[$dt\] $msg"
}
}
}
# Serialize the content of an array to the journalling file, i.e. write a
# complete copy of the array, loosing the history.
proc ::purse::Serialize {array {control purse}} {
variable $control
upvar 0 $control cont
upvar $array arr
# Close current file descriptor to journalling file
catch {
set fd [dict get $cont($array) fd]
close $fd
}
# Write complete copy of current content of array to the file
set file [ToDisk $array $control]
Debug "Serialize $array to $file"
set fd [open $file w]
fconfigure $fd -buffering line
puts $fd [array get arr]
::flush $fd
dict set cont($array) fd $fd; # Remember the file descriptor
# Make space for the array by closing older arrays and arrange to close the
# journal after a while.
Spacer $array $control
AutoClose $array $control
}
# Record latest write timestamp to an array, this is used to automatically close
# journalling files of arrays that we haven't written to for a while.
proc ::purse::LatestWrite { name {control purse}} {
variable $control
upvar 0 $control cont
set nofile [dict get $cont() nofile]
if { $nofile > 0 } {
dict set cont($name) access [clock clicks]
}
}
# Make space to write to an array, this will close the journalling files of the
# arrays that we haven't written to for a while, or rather the "oldest" ones in
# that list.
proc ::purse::Spacer { name { control purse} } {
variable $control
upvar 0 $control cont
set nofile [dict get $cont() nofile]
if { $nofile > 0 } {
# Make sure we keep the array that we want to make space for as the
# latest accessed one.
LatestWrite $name $control
# Construct a list with the names of the arrays that are under our
# control, paired to their last access time.
set accesses {}
foreach nm [array names cont] {
if {$nm ne ""} {
if {[dict exists $cont($nm) access]} {
lappend accesses [list $nm [dict get $cont($nm) access]]
}
}
}
# Sort the list so that latest accessed are first.
set accesses [lsort -index 1 -integer -decreasing $accesses]
# Remove all the ones that are at the end of the list, which now are the
# oldest ones.
set removals [lrange $accesses $nofile end]
if { [llength $removals] > 0 } {
Debug "Making space for array $name among the oldest ones"
foreach nfo $removals {
foreach {nm access} $nfo break
CloseCmd $nm $control
}
}
}
}
# Arrange to automatically close journalling file after a while, if relevant for
# the purse options.
proc ::purse::AutoClose { name {control purse}} {
variable $control
upvar 0 $control cont
# Cancel current timer, if any
if { [dict exists $cont($name) timer] } {
after cancel [dict get $cont($name) timer]
dict unset cont($name) timer
}
# Arrange to close the journalling file to which we write in a little while
set period [dict get $cont() keep]
if { $period > 0 } {
dict set cont($name) \
timer [after $period [list [namespace current]::CloseCmd $name $control]]
}
}
# Forcedly close the journalling file for an array at once. This is to ensure
# that we can keep low the number of resources alloted to the program as a
# whole.
proc ::purse::CloseCmd { name {control purse}} {
variable $control
upvar 0 $control cont
# Return at once in case we don't have any information for the array
# (anymore?)
if {![info exists cont($name)]} {
return
}
# Cancel current timer, if any
if { [dict exists $cont($name) timer] } {
after cancel [dict get $cont($name) timer]
dict unset cont($name) timer
}
# Close the file to which we are writing
if { [dict exists $cont($name) fd] } {
set fd [dict get $cont($name) fd]
if { $fd ne "" } {
Debug "Closing journalling output file for array $name"
catch {close $fd}
}
}
# Remember that we are now not writing to any file anymore.
dict set cont($name) fd ""
}
namespace export -clear purse