- Bit vectors for compact (non-sparse) sets
- Octet-packed integers store any integers very economically, but may be slow
proc uis'add {uisName number} { # add a number to a uis, if it's not yet contained if {$number < 0 || $number > 65535} { error "can't convert $number to Unicode" } upvar 1 $uisName uis set c [format %c $number] if {[string first $c $uis] < 0} {append uis $c} set number } proc uis'remove {uisName number} { # remove a number from a uis upvar 1 $uisName uis set pos [string first [format %c $number] $uis] set uis [string replace $uis $pos $pos] set number } proc uis'has {uis number} { # return 1 if the uis contains the number, else 0 expr {[string first [format %c $number] $uis] >= 0} } proc uis'list {uis} { # converts a uis to a conventional numbers list set res {} foreach char [split $uis ""] {lappend res [scan $char %c]} set res } proc uis'fromList {list} { # converts a numbers list to a uis set res "" foreach number $list {uis'add res $number} set res } interp alias {} uis'cardinality {} string length#------- The classic binary operators work without scan or format:
proc uis'intersection {a b} { set res "" foreach c [split $a ""] { if {[string first $c $b]>=0} {append res $c} } set res } proc uis'union {a b} { set res $a foreach c [split $b ""] { if {[string first $c $a]<0} {append res $c} } set res } proc uis'difference {a b} { set res "" foreach c [split $a ""] { if {[string first $c $b]<0} {append res $c} } set res }# Testing, especially memory consumption:
proc uis'test {} { foreach n {10 100 1000 10000} { set set "" for {set i 0} {$i<$n} {incr i} { lappend set [expr {int(rand()*65536)}] } puts [time {set uis [uis'fromList $set]}] set listl [string length $set] set uisl [string length $uis] puts "($n) $uisl/$listl = [expr {100.*$uisl/$listl}] %" } }if 0 {With slight variations, the string length of the uis is between 16 and 19 % of the straightforward integer list:
634 microseconds per iteration (10) 10/59 = 16.9491525424 % 20597 microseconds per iteration (100) 100/583 = 17.1526586621 % 236866 microseconds per iteration (1000) 991/5826 = 17.0099553725 % 5420460 microseconds per iteration (10000) 9270/58325 = 15.8936990999 %But of course, string operations on longer sets take its toll. So the uis approach may be recommended if you have many sparse integer sets, while not operating on them too often...Afterthought: Without scan or format, the binary operations can be used on sets of characters too (that's all they do). But it's late Sunday night, so that's it for now ;-)Second afterthought: if you want to store lists with possibly repeated integers, just drop the lsearches above. One should still check the argument to format, as negative integers may even produce two bogus numbers (when the error is commented out):
% uis'list [uis'fromList {1 0 -1}] 1 0 255 191}
Arts and crafts of Tcl-Tk programming