{x1,y1), (x2,y2), ..., (xn, yn)such that x1 < x2 < ... < xn
y1 < y2 < ... < ynandL1[x1] = L2[y1] ; L1[x2] = L2[y2] ; ... L1[xn] = L2[yn]Solving this problem is critical to the implementation of diff in Tcl.Here is one Tcl implementation of the "folklore algorithm."Note that Hunt and McIlroy have published a much better algorithm (used in the Unix 'diff' command) that is implemented at diff in Tcl. This page is here to hold some of the historical discussion.
}
namespace eval list {
}
namespace eval list::longestCommonSubsequence {
namespace export compare
}
# Internal procedure that indexes into the 2-dimensional array t,
# which corresponds to the sequence y, looking for the (i,j)th element.
proc list::longestCommonSubsequence::Index { t y i j } {
set indx [expr { ([llength $y] + 1) * ($i + 1) + ($j + 1) }]
return [lindex $t $indx]
}
# Internal procedure that implements Levenshtein to derive the longest
# common subsequence of two lists x and y.
proc list::longestCommonSubsequence::ComputeLCS { x y } {
set t [list]
for { set i -1 } { $i < [llength $y] } { incr i } {
lappend t 0
}
for { set i 0 } { $i < [llength $x] } { incr i } {
lappend t 0
for { set j 0 } { $j < [llength $y] } { incr j } {
if { [string equal [lindex $x $i] [lindex $y $j]] } {
set lastT [Index $t $y [expr { $i - 1 }] [expr {$j - 1}]]
set nextT [expr {$lastT + 1}]
} else {
set lastT1 [Index $t $y $i [expr { $j - 1 }]]
set lastT2 [Index $t $y [expr { $i - 1 }] $j]
if { $lastT1 > $lastT2 } {
set nextT $lastT1
} else {
set nextT $lastT2
}
}
lappend t $nextT
}
}
return $t
}
# Internal procedure that traces through the array built by ComputeLCS
# and finds a longest common subsequence -- specifically, the one that
# is lexicographically first.
proc list::longestCommonSubsequence::TraceLCS { t x y } {
set trace {}
set i [expr { [llength $x] - 1 }]
set j [expr { [llength $y] - 1 }]
set k [expr { [Index $t $y $i $j] - 1 }]
while { $i >= 0 && $j >= 0 } {
set im1 [expr { $i - 1 }]
set jm1 [expr { $j - 1 }]
if { [Index $t $y $i $j] == [Index $t $y $im1 $jm1] + 1
&& [string equal [lindex $x $i] [lindex $y $j]] } {
lappend trace xy [list $i $j]
set i $im1
set j $jm1
} elseif { [Index $t $y $im1 $j] > [Index $t $y $i $jm1] } {
lappend trace x $i
set i $im1
} else {
lappend trace y $j
set j $jm1
}
}
while { $i >= 0 } {
lappend trace x $i
incr i -1
}
while { $j >= 0 } {
lappend trace y $j
incr j -1
}
return $trace
}
# list::longestCommonSubsequence::compare --
#
# Compare two lists for the longest common subsequence
#
# Arguments:
# x, y - Two lists of strings to compare
# matched - Callback to execute on matched elements, see below
# unmatchedX - Callback to execute on unmatched elements from the
# first list, see below.
# unmatchedY - Callback to execute on unmatched elements from the
# second list, see below.
#
# Results:
# None.
#
# Side effects:
# Whatever the callbacks do.
#
# The 'compare' procedure compares the two lists of strings, x and y.
# It finds a longest common subsequence between the two. It then walks
# the lists in order and makes the following callbacks:
#
# For an element that is common to both lists, it appends the index in
# the first list, the index in the second list, and the string value of
# the element as three parameters to the 'matched' callback, and executes
# the result.
#
# For an element that is in the first list but not the second, it appends
# the index in the first list and the string value of the element as two
# parameters to the 'unmatchedX' callback and executes the result.
#
# For an element that is in the second list but not the first, it appends
# the index in the second list and the string value of the element as two
# parameters to the 'unmatchedY' callback and executes the result.
proc list::longestCommonSubsequence::compare { x y
matched
unmatchedX unmatchedY } {
set t [ComputeLCS $x $y]
set trace [TraceLCS $t $x $y]
set i [llength $trace]
while { $i > 0 } {
set indices [lindex $trace [incr i -1]]
set type [lindex $trace [incr i -1]]
switch -exact -- $type {
xy {
set c $matched
eval lappend c $indices
lappend c [lindex $x [lindex $indices 0]]
uplevel 1 $c
}
x {
set c $unmatchedX
lappend c $indices
lappend c [lindex $x $indices]
uplevel 1 $c
}
y {
set c $unmatchedY
lappend c $indices
lappend c [lindex $y $indices]
uplevel 1 $c
}
}
}
return
}# With this code in hand, we can now write the external parts of a diff command. The various options of diff alter how it displays the comparison, but not its fundamental operation. Here's an external wrapper that gives very simple-minded output. namespace import list::longestCommonSubsequence::compare
proc umx { index value } {
variable lastx
variable xlines
append xlines "< " $value \n
set lastx $index
}
proc umy { index value } {
variable lasty
variable ylines
append ylines "> " $value \n
set lasty $index
}
proc matched { index1 index2 value } {
variable lastx
variable lasty
variable xlines
variable ylines
if { [info exists lastx] && [info exists lasty] } {
puts "[expr { $lastx + 1 }],${index1}c[expr {$lasty + 1 }],${index2}"
puts -nonewline $xlines
puts "----"
puts -nonewline $ylines
} elseif { [info exists lastx] } {
puts "[expr { $lastx + 1 }],${index1}d${index2}"
puts -nonewline $xlines
} elseif { [info exists lasty] } {
puts "${index1}a[expr {$lasty + 1 }],${index2}"
puts -nonewline $ylines
}
catch { unset lastx }
catch { unset xlines }
catch { unset lasty }
catch { unset ylines }
}
# Really, we should read the first file in like this:
# set f0 [open [lindex $argv 0] r]
# set x [split [read $f0] \n]
# close $f0
# But I'll just provide some sample lines:
set x {}
for { set i 0 } { $i < 10 } { incr i } {
lappend x a r a d e d a b r a x
}
# The second file, too, should be read in like this:
# set f1 [open [lindex $argv 1] r]
# set y [split [read $f1] \n]
# close $f1
# Once again, I'll just do some sample lines.
set y {}
for { set i 0 } { $i < 10 } { incr i } {
lappend y a b r a c a d a b r a
}
compare $x $y matched umx umy
matched [llength $x] [llength $y] {}if 0 {}

