xk2600 I swear I had seen this documented somewhere on the wiki, however I can't find the reference, so I added this. Feel free to update add additional options, etc..
The problem: Have a string that needs to be split on a delimiter.. what is the fastest way to accomplish this, especially when the string is exceptionally long or variable in length?
testStringSplittingOptions
# '''USAGE:''' '''testStringSplittingOptions''' ''delims tokens iterations ?detail?''
# delims - list of delimiters to test with
# tokens - quantity of random tokens to split with delim
# iterations - iterations to go through when running the time commands
# within this proc
# detail - provide detailed reporting instead of tabular.
#
proc testStringSplittingOptions {tokens iterations} {
proc baseline {var1 var2} {
return
}
set iMax $iterations
set iterations [list]
for {set i 1} {$i <= $iMax} {set i [expr {$i * 10}]} {
lappend iterations $i
}
# prep string
set testlist [list var0=0]
for {set i 0} {$i < $tokens} {incr i} {
lappend testlist [list var[expr {int(rand()*1000)}]=[expr {int(rand()*100000)}]]
}
foreach iteration $iterations {
foreach separator [list & { } \n] {
puts -nonewline [format {iteration: %6d } $iteration]
puts -nonewline [format {separator: %2s } [string map {\n LF} $separator]]
# join teststring with separator
set teststring [join $testlist $separator]
# store string length for putput
set strlen [string length $teststring]
puts -nonewline [format {strlen: %6d } $strlen]
set proctime [ time { baseline test test2 } $iteration]
puts -nonewline [format {proc: %8f } [scan $proctime %f]]
#puts \ \ \ \ $proctime
flush stdout
# split ####################################################
set splitTime [time {
split $teststring $separator
} $iteration]
puts -nonewline [format {split: %8f } [scan $splitTime %d]]
#puts \ \ \ \ $splitTime
flush stdout
# inline-re ################################################
set regexpTime [time {
regexp -inline -all -- "(\[^$separator\]*)$separator" $teststring
} $iteration]
puts -nonewline [format {inline-re: %8f } [scan $regexpTime %d]]
#puts \ \ \ \ $regexpTime
flush stdout
# scan #####################################################
set scanTime [time {
set end [string length $teststring]
set token {}
set cursor 0
set c 0
while {$cursor < $end} {
lassign [scan [string range $teststring $cursor $end] "%\[^$separator\]%n$separator"] token c
incr cursor [expr {$c + 1}]
}
} $iteration]
puts [format {inline-re: %8f } [scan $scanTime %d]]
#puts \ \ \ \ $scanTime
flush stdout
puts {#################################################################}
puts ""
}
}
}
% testStringSplittingOptions [list & { } \n] 1000 1000
iteration: 1 separator: & strlen: 12802 proc: 36.000000 split: 172.000000 inline-re: 3143.000000 inline-re: 38266.000000
iteration: 1 separator: strlen: 12802 proc: 2.000000 split: 59.000000 inline-re: 2059.000000 inline-re: 34889.000000
iteration: 1 separator: LF strlen: 12802 proc: 1.000000 split: 77.000000 inline-re: 1933.000000 inline-re: 34756.000000
iteration: 10 separator: & strlen: 12802 proc: 0.900000 split: 73.000000 inline-re: 1985.800000 inline-re: 35495.700000
iteration: 10 separator: strlen: 12802 proc: 0.800000 split: 77.800000 inline-re: 2006.200000 inline-re: 34469.900000
iteration: 10 separator: LF strlen: 12802 proc: 0.700000 split: 84.300000 inline-re: 2011.000000 inline-re: 34777.700000
iteration: 100 separator: & strlen: 12802 proc: 0.570000 split: 67.090000 inline-re: 1980.850000 inline-re: 34749.980000
iteration: 100 separator: strlen: 12802 proc: 0.790000 split: 66.790000 inline-re: 1970.210000 inline-re: 34732.560000
iteration: 100 separator: LF strlen: 12802 proc: 0.720000 split: 66.380000 inline-re: 1981.890000 inline-re: 35311.660000
iteration: 1000 separator: & strlen: 12802 proc: 0.381000 split: 66.803000 inline-re: 2005.331000 inline-re: 34960.505000
iteration: 1000 separator: strlen: 12802 proc: 0.446000 split: 68.072000 inline-re: 1998.458000 inline-re: 34863.826000
iteration: 1000 separator: LF strlen: 12802 proc: 0.453000 split: 66.062000 inline-re: 2004.961000 inline-re: 34810.200000