#! /usr/bin/env tclsh proc count_strmap str { expr {[string length $str]-[string length [string map {\n {}} $str]]} }; proc count_split string { expr {[llength [split $string \n]] - 1} } proc count_split2 {string} { expr {[llength [split $string {\n}]] - 1} } proc count_regexp string { regexp -all \n $string } proc count_regsub string { regsub -all -- {\n} $string {\n} string } proc count_stringfirst {string} { set rc [set ndx 0] while {[set ndx [expr { [string first \n $string $ndx] + 1}]]} { incr rc } return $rc } proc count_splitchar string { foreach x [split $string {}] {if {[catch {incr counters($x)}]} {set counters($x) 1}} return $counters(\n) } proc count_splitchar2 string { set rc 0 foreach x [split $string {}] { if {$x eq "\n"} { incr rc } } return $rc } set string_short line1\nline2\nline3\nline4\n set string_long [string repeat $string_short 100] proc test1 string { puts [list {timings for string of length} [string length $string]] # in order of performance for large strings foreach method { count_strmap count_split count_split2 count_splitchar2 count_stringfirst count_regexp count_regsub count_splitchar } { puts [list $method [time [list $method $string] 10002]] } } test1 $string_short puts {} test1 $string_longPYK 2016-09-15: Historical timings for some procedures are found in the history for this page. Here are the timings on a modern laptop:
{timings for string of length} 24 count_strmap {0.5195960807838432 microseconds per iteration} count_split {0.8080383923215357 microseconds per iteration} count_split2 {0.8676264747050589 microseconds per iteration} count_splitchar2 {2.4941011797640473 microseconds per iteration} count_stringfirst {2.005398920215957 microseconds per iteration} count_regexp {1.959608078384323 microseconds per iteration} count_regsub {2.5668866226754647 microseconds per iteration} count_splitchar {16.570585882823437 microseconds per iteration} {timings for string of length} 2400 count_strmap {7.925714857028594 microseconds per iteration} count_split {23.276944611077784 microseconds per iteration} count_split2 {29.25474905018996 microseconds per iteration} count_splitchar2 {118.79934013197361 microseconds per iteration} count_stringfirst {134.4123175364927 microseconds per iteration} count_regexp {153.87112577484504 microseconds per iteration} count_regsub {165.65566886622676 microseconds per iteration} count_splitchar {335.93011397720454 microseconds per iteration}
RS contributes maybe not the fastest, but for now the shortest variation, to be called with the wanted character and the string as additional arguments:
interp alias {} countCharsA {} regexp -all
Stu 2007-10-13 I've taken the above procs and tried to make them as similar as possible and then run some tests with various lengths of data. Some of the procs only count characters, others can count strings. Some procs have been adjusted for this. Note the difference in speed between splitchar2 and splitchar2a simply by using eq instead of ==. The overall winner is strmap. Also shown in the output is the result of each proc; they should be all the same. If I've gotten your name wrong or you feel I misrepresented you or your proc somehow please make corrections.Example output from my machine:
1 char search, 240000 chars string splitchar 238632 microseconds per iteration (40000) splitchar2 305889 microseconds per iteration (40000) splitchar2a 167124 microseconds per iteration (40000) strmapC 19843 microseconds per iteration (40000) splitline 45872 microseconds per iteration (40000) splitline2 42932 microseconds per iteration (40000) regline 192236 microseconds per iteration (40000) regsubline 32450 microseconds per iteration (40000) strmapS 19131 microseconds per iteration (40000) countCharA 185561 microseconds per iteration (40000) strfirst 61804 microseconds per iteration (40000) ---- 100 chars search, 11900 chars string regline 67589 microseconds per iteration (100) regsubline 544 microseconds per iteration (100) strmapS 138 microseconds per iteration (100) countCharA 64383 microseconds per iteration (100) strfirst 458 microseconds per iteration (100)The procs:
proc splitline {string countChar} { set rc [llength [split $string $countChar]] incr rc -1 return $rc } proc regline {string countString} { set rc [regexp -line -all $countString $string] return $rc } proc splitchar {string countChar} { foreach x [split $string ""] {if {[catch {incr counters($x)}]} {set counters($x) 1}} set rc $counters($countChar) return $rc } # Brett Schwarz (split into char and string versions) proc strmapC {string countChar} { return [expr {[string length $string]-[string length [string map [list $countChar ""] $string]]}] } proc strmapS {string countString} { return [expr {([string length $string]-[string length [string map [list $countString ""] $string]]) / [string length countString]}] } # Richard Suchenwirth interp alias {} countCharsA {} regexp -all # Michael A. Cleverly proc regsubline {string countString} { return [regsub -all -- $countString $string $countString string] } # Miguel Sofer proc splitchar2 {string countChar} { set rc 0 foreach x [split $string ""] { if {$x == $countChar} { incr rc } } return $rc } # Miguel Sofer (== changed to eq) proc splitchar2a {string countChar} { set rc 0 foreach x [split $string ""] { if {$x eq $countChar} { incr rc } } return $rc } # Michael A. Cleverly proc strfirst {string countString} { set rc [set ndx 0] while {[set ndx [expr { [string first $countString $string $ndx] + 1}]]} { incr rc } return $rc }The thing:
proc go {string countString times} { if {[string length $countString] < 2} { puts "splitchar [time {splitchar $string $countString} $times] ([splitchar $string $countString])" puts "splitchar2 [time {splitchar2 $string $countString} $times] ([splitchar2 $string $countString])" puts "splitchar2a [time {splitchar2a $string $countString} $times] ([splitchar2a $string $countString])" puts "strmapC [time {strmapC $string $countString} $times] ([strmapC $string $countString])" puts "splitline [time {splitline $string $countString} $times] ([splitline $string $countString])" puts "splitline2 [time {splitline2 $string $countString} $times] ([splitline2 $string $countString])" } puts "regline [time {regline $string $countString} $times] ([regline $string $countString])" puts "regsubline [time {regsubline $string $countString} $times] ([regsubline $string $countString])" puts "strmapS [time {strmapS $string $countString} $times] ([strmapS $string $countString])" puts "countCharA [time {countCharsA $countString $string} $times] ([countCharsA $countString $string])" puts "strfirst [time {strfirst $string $countString} $times] ([strfirst $string $countString])" } set times 1000 puts "Tcl patchlevel: [info patch]" for {set rep 100} {$rep <= 100} {incr rep 10} { set countString "\n" set string [string repeat "line1\nline2\nline3\nline4\n" $rep] puts "[string length $countString] char search, [string length $string] chars string" go $string $countString $times puts ---- set countString "line" set string [string repeat "line1\nline2\nline3\nline4\n" $rep] puts "[string length $countString] chars search, [string length $string] chars string" go $string $countString $times puts ---- set countString "\n" set string [string repeat [string repeat "line1\nline2\nline3\nline4\n" $rep] $rep] puts "[string length $countString] char search, [string length $string] chars string" go $string $countString $times puts ---- set countString [string repeat hovercraft 10] set string [string repeat "my $countString is full of eels" $rep] puts "[string length $countString] chars search, [string length $string] chars string" go $string $countString $times }