dbohdan 2018-03-26: The following module lets you find the longest common
word prefix of two strings, which is to say, the first
N words the strings have in common. For our purposes
words are defined as string fragments separated by separators. A separator is a string that matches a given regular expression understood by
regexp.
See also edit
Download with
wiki-reaper:
wiki-reaper -x 55230 0 | tee lcwp.tcl#! /usr/bin/env tclsh
# Copyright (c) 2018 dbohdan
# License: MIT
package require Tcl 8.5
namespace eval ::lcwp {
variable version 0.3.0
interp alias {} lcwp {} longest-common-word-prefix
}
proc ::lcwp::longest-common-word-prefix {
s1 s2 {sep {\s+}} {includeTailSep 0}
} {
if {[string length $s2] > [string length $s1]} {
set t $s2
set s2 $s1
set s1 $t
unset t
}
set offset 0
set tailSepLength 0
while 1 {
lassign [read-word $s1 $offset $sep] label1 \
matchedFramement1 \
matchedSep1
lassign [read-word $s2 $offset $sep] label2 \
matchedFramement2 \
matchedSep2
# Handle fragments.
if {$matchedFramement1 ne $matchedFramement2} {
break
}
set fragmentLength [string length $matchedFramement1]
incr offset $fragmentLength
if {$fragmentLength > 0} {
set tailSepLength 0
}
# Handle separators.
if {$matchedSep1 ne $matchedSep2} {
break
}
incr tailSepLength [string length $matchedSep1]
incr offset [string length $matchedSep1]
# Handle string end.
if {$label1 eq {END} || $label2 eq {END}} {
break
}
}
if {!$includeTailSep} {
incr offset -$tailSepLength
}
return [string range $s1 0 $offset-1]
}
proc ::lcwp::read-word {s offset sep} {
if {[regexp -indices -start $offset -- $sep $s match]} {
lassign $match start end
set matchedFramement [string range $s $offset $start-1]
set matchedSep [string range $s $start $end]
set label MORE
} else {
set matchedFramement [string range $s $offset end]
set matchedSep {}
set label END
}
return [list $label $matchedFramement $matchedSep]
}
proc ::lcwp::replace-prefix {prefix s {replacement { }}} {
set prefixLen [string length $prefix]
set replacementLen [string length $replacement]
set repeats [expr {
$replacementLen > 0 ?
$prefixLen / $replacementLen + 1 :
0
}]
set newPrefix [string range [string repeat $replacement $repeats] \
0 \
$prefixLen-1]
return $newPrefix[string range $s $prefixLen end]
}
# If this is the main script, run the tests.
if {[info exists argv0] && ([file tail [info script]] eq [file tail $argv0])} {
package require tcltest
namespace path ::lcwp
tcltest::test common-word-prefix-1.1 {simple case} -body {
lcwp {hello world 1} {hello world 2}
} -result {hello world}
tcltest::test common-word-prefix-1.2 {all the same words} -body {
list [lcwp foo foo] \
[lcwp {hello world} {hello world}]
} -result {foo {hello world}}
tcltest::test common-word-prefix-1.3 {all different words} -body {
list [lcwp foo bar] \
[lcwp {foo bar} {baz qux}]
} -result {{} {}}
tcltest::test common-word-prefix-1.4 {words sharing a prefix} -body {
list [lcwp foo food] \
[lcwp fool food] \
[lcwp {hello world alpha} {hello world aleph}]
} -result {{} {} {hello world}}
tcltest::test common-word-prefix-1.5 {different length} -body {
list [lcwp {foo bar baz} foo] \
[lcwp {foo bar baz} {foo bar}] \
[lcwp {foo bar baz} {foo bar }] \
[lcwp {foo bar } {foo bar baz}] \
[lcwp {foo bar} {foo bar baz}] \
[lcwp foo {foo bar baz}]
} -result {foo {foo bar} {foo bar} {foo bar} {foo bar} foo}
tcltest::test common-word-prefix-1.6 includeTailSep -body {
list [lcwp {hello world 1} {hello world 2} { } 0] \
[lcwp {hello world 1} {hello world 2} { } 1] \
[lcwp hello-world-1 hello-world-2 - 0] \
[lcwp hello-world-1 hello-world-2 - 1]
} -result {{hello world} {hello world } hello-world hello-world-}
tcltest::test common-word-prefix-1.7 whitespace-1 -body {
list [lcwp {foo bar 1} {foo bar } { } 0] \
[lcwp {foo bar 1} {foo bar } { } 1] \
[lcwp { foo bar 1} { foo bar } { } 0] \
[lcwp { foo bar 1} { foo bar } { } 1]
} -result {{foo bar} {foo bar } { foo bar} { foo bar }}
tcltest::test replace-prefix-1.1 {default use} -body {
list [replace-prefix {} {}] \
[replace-prefix foo foo] \
[replace-prefix {foo bar} {foo bar baz}]
} -result {{} { } { baz}}
tcltest::test replace-prefix-1.2 pattern -body {
list [replace-prefix {} {} 12345] \
[replace-prefix foo foo 12345] \
[replace-prefix {foo bar } {foo bar baz} 12345]
} -result {{} 123 12345123baz}
# Exit with a nonzero status if there are failed tests.
set failed [expr {$tcltest::numTests(Failed) > 0}]
tcltest::cleanupTests
if {$failed} {
exit 1
}
}
Use example edit
Code
source lcwp.tcl
proc log text {
set message "[clock format [clock seconds] -gmt 1] $text"
set prefix [::lcwp::longest-common-word-prefix $::prevLogMessage \
$message \
{\s} \
1]
puts stderr [::lcwp::replace-prefix $prefix $message $::logPlaceholder]
set ::prevLogMessage $message
}
foreach logPlaceholder {{ } -=} {
set prevLogMessage {}
log "frobnicating file /foo/bar"
log "frobnicating file /foo/baz"
log "frobnicating file /foo/qux"
}
Output
Mon Mar 26 16:37:34 GMT 2018 frobnicating file /foo/bar
/foo/baz
/foo/qux
Mon Mar 26 16:37:34 GMT 2018 frobnicating file /foo/bar
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-/foo/baz
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-/foo/qux