Updated 2011-05-12 08:50:13 by RLE

Richard Suchenwirth - The experiments in Syntax parsing in Tcl ended with a treelist representation of the parse result. After due delay, but as announced, here we continue: how to display a tree structure on a canvas. Plus, some more graph theory helpers...

Treelists (a label, possibly followed by the sons, which are treelists again) are an extremely compact representation of a tree. Consider that
 {A {B C D} {E F G}}

is equivalent to the tree
     A
    / \
   B   E
  /\   /\
 C  D F  G

But navigating and working on a treelist can be difficult, so I recast it into an array-based graph representation. A graph array in this implementation has elements of the following types:
 g(nodes)   {...}     -- list of all nodes, in creation order
 g($n) name ?attr...? -- node descriptor, $n being a unique ID
 g($n1,$n2) ?attr...? -- edge descriptor, from node $n1 to $n2

Here's some elementary graph procedures:
 proc graphInit _g {upvar 1 $_g g; catch {unset g}; set g(nodes) {}}
 proc addNode {_g node args} {
    upvar 1 $_g g
    set id [llength $g(nodes)]
    set g($id) [concat $node $args]
    lappend g(nodes) $id
    set id
 }
 proc addEdge {_g from to args} {upvar 1 $_g g; set g($from,$to) $args}
 proc isTerminal {_g id} {
    upvar 1 $_g g
    expr {[array names g $id,*]==""} ;# no edges leading out
 }
 proc father {_g id} {
    upvar 1 $_g g
    regsub -all ,$id [array names g *,$id] "" res ;# could be >1
    set res
 }
 proc sons {_g id} {
    upvar 1 $_g g
    regsub -all $id, [array names g $id,*] "" res
    set res
 }
 proc edges _g {upvar 1 $_g g; array names g *,*}
 proc nodes _g {upvar 1 $_g g; set g(nodes)}
# This tiny recursive routine loads a graph from a treelist: 
 proc treelist2graph {L _g} {
    upvar 1 $_g g
    set from [addNode g [lindex $L 0]]
    foreach i [lrange $L 1 end] {
        set to [treelist2graph $i g]
        addEdge g $from $to
    }
    set from
 }

if 0 {Conversely, this turns a tree graph into a treelist (but regrettably shuffles the order of the terminals):}
 proc graph2treelist {_g {from ""}} {
    upvar 1 $_g g
    if {$from==""} {
        if {![isTree g]} {error "$_g is not a tree"}
        set from [initials g] ;# must be a single node now
    }
    set res $g($from)
    foreach i [sons g $from] {
        lappend res [graph2treelist g $i]
    }
    set res
 }
 proc isTree _g {
    upvar 1 $_g g
    if {[llength [set root [initials g]]]!=1} {return 0}
    foreach i [terminals g] {set t($i) 0} ;# init tally
    if [catch {pathes g $root} pathes] {return 0}
    foreach i $pathes {incr t([lindex $i end])}
    set tallies {}
    foreach {- i} [array get t] {lappend tallies $i}
    expr {[min $tallies]==1 && [max $tallies]==1}
 }

if 0 {The only thing that took some more effort was laying out the tree, i.e. determine x/y coordinates for each node (edges rely on their respective nodes). Terminal nodes (leaves) are arranged at bottom. So we work bottom up, which is easiest by just reverting the nodes list. We save the results in (xy:$n) keys in the graph array.}
 proc layout _g {
    upvar 1 $_g g
    set xt 0; set yt 0; set dx -60; set dy -30
    foreach id [lrevert [nodes g]] {
        if [isTerminal g $id] {
            set g(xy:$id) [list $xt $yt]
            incr xt $dx
        } else {
            set xs {}; set ys {}
            foreach i [sons g $id] {
                foreach {x y} $g(xy:$i) break
                lappend xs $x
                lappend ys $y
            }
            set x [expr {([max $xs]+[min $xs])/2}] ;# x:center between sons
            set y [expr {[min $ys]+$dy}]  ;# y: one layer above highest son
            set g(xy:$id) [list $x $y]
        }
    }
 }

# Finally, displaying the thing on a canvas is easy again:
 proc draw {_g w} {
    upvar 1 $_g g
    foreach i [edges g] {
        foreach {from to} [split $i ,] break
        eval $w create line $g(xy:$from) $g(xy:$to)
    }
    foreach i [nodes g] {
        set id [eval $w create text $g(xy:$i) -text [list $g($i)]] 
        eval $w create rect [$w bbox $id] -fill white -outline white
        $w raise $id
    }
 }

if 0 {Besides linguistic purposes, trees can be found in other domains. Here's for instance how to pack a widget tree into a treelist:}
 proc widgets2treelist w {
    set res [lindex [split $w .] end]
    foreach i [winfo children $w] {
        lappend res [widgets2treelist $i]
    }
    set res
 }

if 0 { More generalized directed graphs are characterized by their nodeset and edgeset. Even the nodeset can be deduced from the edgeset (if we exclude singleton nodes), so we can specify a graph just by its edgelist:
 {A,B B,C C,D D,B D,E}

This is a cycle B-C-D with one edge each coming in and out. Here's how to turn an edgelist into a graph:}
 proc edgelist2graph {_g edgelist} {
    upvar 1 $_g g
    graphInit g
    foreach i $edgelist {
        foreach j [split $i ,] {ladd g(nodes) $j; set g($j) $j}
        set g($i) ""
    }
 }
 #conversely, it's even simpler...
 proc graph2edgelist _g {upvar 1 $_g g; lsort [array names g *,*]}
 
 proc pathes {_g from} {
    upvar 1 $_g g
    set pathes {}
    set todo $from
    while 1 {
        set newpathes {}
        foreach i $todo {
            foreach j [sons g [lindex $i end]] {
                if {[lsearch $i $j]>=0} {error "cycle: $i $j"}
                if {[isTerminal g $j]} {
                    lappend pathes [concat $i $j]
                } else {
                    lappend newpathes [concat $i $j]
                }
            }
        }
        if {$newpathes==""} break ;# nothing new, so we're done
        set todo $newpathes
    }
    set pathes
 }
 proc isCyclic _g {
    upvar 1 $_g g
    if {[set initials [initials g]]==""} {return 1}
    foreach i $initials {
        if [catch {pathes g $i}] {return 1}
    }
    return 0
 }
 proc initials _g {
    upvar 1 $_g g
    set res  {}
    foreach i [nodes g] {
        if {[father g $i]==""} {lappend res $i}
    }
    set res
 }
 proc terminals _g {
    upvar 1 $_g g
    set res  {}
    foreach i [nodes g] {
        if {[sons g $i]==""} {lappend res $i}
    }
    set res
 }
 ################### little helpers
 proc max L {lindex [lsort -integer $L] end}
 proc min L {lindex [lsort -integer $L] 0}
 proc ladd {_L x} {
    upvar 1 $_L L
    if {[lsearch $L $x]<0} {lappend L $x}
 }
 proc lrevert L {
    for {set res {}; set i [llength $L]} {$i>0} {#see loop} {
        lappend res [lindex $L [incr i -1]]
    }
    set res
 }
 ## testing... requires the code from "Syntax parsing with Tcl"
 proc test {{s "the old man feeds the lazy dog"}} {
    source sep_synt.tcl
    graphInit g
    set tl [parse1 $s $rules]
    treelist2graph [lindex $tl 0] g
    layout g
    pack [canvas .c]
    draw g .c
    foreach {x y} [.c bbox all] break
    .c move all [expr 5-$x] [expr 5-$y]
    foreach {- - x1 y1} [.c bbox all] break
    .c config -width [expr $x1+10] -height [expr $y1+10]
 }
 test

See also: