Author
Jackson McCann.
#=======================================================================
#
# Package: DGA.tcl
#
# Purpose: A tcl package that implements a number of algorithms on top of
# the basic Tcl ::struct::graph package.
package provide DGA 1.0
#=======================================================================
# NAMESPACE ::DGA::
namespace eval DGA {
#===================================================================
# Export the functions that the user should be calling
namespace export shortest_path min_span_tree predecessor_list
namespace export pl_node pl_arc pl_dist
namespace export topological_sort outonly_nodes is_DAG
# The graph package is needed for obvious reasons so we include
# the struct package that contains it
package require struct
# Define any variables used by the package
variable topo_sort {}
#-------------------------------------------------------------------
# update_node
#
# g - The graph
# n - The node
# u - Used start 0 Unused, 1 Found, 2 Completely used
# d - The distance from the starting node
# p - The predecessor node
proc update_node { g n d p u } {
# Set this nodes predecessor
$g node set $n -key predecessor $p
# The distance to the source node
$g node set $n -key distance $d
# Set the used flag
if { [expr $u > -1] } {
$g node set $n -key "used" $u
}
}
#-------------------------------------------------------------------
# init_graph
# Algorithms such as Dijkstra'a and Prim's need to flag the nodes
# and arcs of the graph and to record additional information
# against the nodes and arcs. This function initializes these
# values for a graph
proc init_graph { g } {
# Setup. Update each node with the keyed values that will hold
# the following information for each node:
# The distance from the source to the node - infinite
# The predecessor of the node - nothing
# The nodes status 0 - Not found, 1 - Found, 2 - Finished
foreach node [$g nodes] {
# This node doesn't have a predecessor and the distance to the
# source node is undefined and the node is unused
update_node $g $node -1 {} 0
}
# Initialize the arcs in the graph. Make sure they all
# have a weight associated with them. Give a default
# weight of 1 if no weight is found. Set a flag to show
# if the arc has been used in the algorithm
foreach arc [$g arcs] {
$g arc set $arc -key used 0
if { [catch {
$g arc get $arc -key weight
}] } {
$g arc set $arc -key weight 1.0
}
}
}
#-------------------------------------------------------------------
# min_arc
# Within the graph zero, one or many nodes will be in state 1,
# i.e., the node has been found and still has outward arcs that
# have not been used. Find the arc that has the minimum weight.
# As a side effect if a node has no unused outward arcs left
# then mark the node as used. If no arc can be found then
# return an empty list.
# g - The graph to search
proc min_arc { g } {
# Initialise the working variables used by this proc
# The arc from the source node
set tArc {}
# The weight of the arc
set tArcMin -1
# Find the shortest arc out of the nodes that have been found.
# Use this arc if it provides a shorter route to the target or
# if it discovers a new target node
foreach node [$g nodes -key used -value 1] {
# The hasArc counter is used to determine when a node has
# no unused arcs left.
set hasArc 0
# Look at each arc that goes out of the current node
foreach arc [$g arcs -out $node -key used -value 0] {
# Set hasArc as this node has at least one unused arc
set hasArc 1
# Get the weight of this arc. If it is less than the
# current value of tArcMin then it will be used
set arcWeight [$g arc get $arc -key weight]
if { [expr $tArcMin == -1] || [expr $tArcMin > $arcWeight] } {
# Store the source node and arc names
set tArc $arc
set tArcMin $arcWeight
}
}
# If a node doesn't have any unused arcs left then
# mark the node as completely used
if { [expr $hasArc == 0] } {
$g node set $node -key used 2
}
}
# Now mark this arc as having been used
if { $tArc != {} } {
$g arc set $tArc -key used 1
}
return $tArc
}
#===================================================================
# predecessor_list
# After an algorithm such as:
# shortest_path
# min_span_tree
# has been run on the graph it will contain a predecessor list,
# that is for each node that was found an arc will be held under
# the key 'predecessor' that defines the arc and node that this
# node was reached from. This function returns this list as a
# Tcl list with a structure. The pl_ functions can be used to
# access this list
# g - The graph
proc predecessor_list { g } {
set idxList {}
set detList {}
foreach node [$g nodes] {
# The distance node is from the source
set nDist [$g node get $node -key distance]
# The arc that links node to its predecessor
set pArc [$g node get $node -key predecessor]
# The node that precedes this node, if there is one
if { $pArc == {} } {
set pNode {}
} else {
set pNode [$g arc source $pArc]
}
lappend idxList $node
lappend detList [list $node $pNode $pArc $nDist]
}
return [list [lsort $idxList] [lsort $detList]]
}
#===================================================================
# pl_node
# pl_arc
# pl_dist
# Three helper functions that make it easier to get
# details about a node's predecessor.
# pl - The predecessor list as returned by predecessor_list
# n - The node
proc pl_node { pl n } {
set idx [lsearch -sorted [lindex $pl 0] $n]
return [lindex [lindex [lindex $pl 1] $idx] 1]
}
proc pl_arc { pl n } {
set idx [lsearch -sorted [lindex $pl 0] $n]
return [lindex [lindex [lindex $pl 1] $idx] 2]
}
proc pl_dist { pl n } {
set idx [lsearch -sorted [lindex $pl 0] $n]
return [lindex [lindex [lindex $pl 1] $idx] 3]
}
#===================================================================
# shortest_path
# Using Dijkstra's algorithm find a shortest path from the node
# n to every other node that is reachable from n.
#
# g - The graph to be processed
# n - The starting node
proc shortest_path { g n } {
# Setup. Update the nodes and arcs with the required
# flags etc.
init_graph $g
# Mark the starting node as being used and as having a distance
# of zero from itself.
update_node $g $n 0.0 {} 1
# Now start looking for nodes we don't know about by exploring out
# from the starting node.
while {1} {
# Find the arc with minimum length that goes from
# a found node
set tArc [min_arc $g]
# Test for the end of the graph, we have no unused nodes
# or arcs left. This does not imply that all of the nodes
# in the graph have been discovered, some nodes may not be
# reachable from the given starting node. These can be
# identified as they have a distance of -1.
if { $tArc == {} } { break }
# Get the distance from the first node of
# this node
set sDist [$g node get [$g arc source $tArc] -key distance]
# Get the name of the target node
set tNode [$g arc target $tArc]
# Get the weight of the arc joining the source and
# target nodes.
set weight [$g arc get $tArc -key weight]
# Get the distance from the source node of the
# target node. This will be -1 if we have never
# seen this node before.
set tDist [$g node get $tNode -key distance]
# Does this arc build a short path to the target node?
if { [expr $tDist == -1] } {
# Yes - it must do, this is the first time
# the target node has been encountered.
# Save the distance from the source node and the
# arc that leads to the predecessor and mark the
# node as found, it will now be considered next time
# we look for the shortest arc.
update_node $g $tNode [expr $sDist + $weight] $tArc 1
} else {
# Does this arc provide a cheaper way to get to
# the already discovered node? Calculate the distance
# based on the new arc's weight and compare it with the stored
# distance for the node.
set ttDist [expr $sDist + $weight]
if { [expr $ttDist < $tDist] } {
# Update the node to use the new arc as its
# predecessor
update_node $g $tNode $ttDist $tArc -1
}
}
}
}
#===================================================================
# min_span_tree
# Using Prims's algorithm find a minimum spanning tree starting
# at the node n and reaching to every other node that is reachable
# from n.
#
# g - The graph to be processed
# n - The starting node
proc min_span_tree { g n } {
# Setup. Update the nodes and arcs with the required
# flags etc.
init_graph $g
# Mark the starting node as being used and as having a distance
# of zero from itself.
update_node $g $n 0.0 {} 1
# Now start looking for nodes we don't know about by exploring out
# from the starting node.
while {1} {
# Find the arc with minimum length that goes from
# a found node
set tArc [min_arc $g]
# Test for the end of the graph, we have no unused nodes
# or arcs left. This does not imply that all of the nodes
# in the graph have been discovered, some nodes may not be
# reachable from the given starting node. These can be
# identified as they have a distance of -1.
if { $tArc == {} } { break }
# Get the distance from the first node of
# this node
set sDist [$g node get [$g arc source $tArc] -key distance]
# Get the name of the target node.
set tNode [$g arc target $tArc]
# Get the weight of the arc joining the source and
# target nodes.
set weight [$g arc get $tArc -key weight]
# Get the distance from the source node of the
# target node. This will be -1 if we have never
# seen this node before.
set tDist [$g node get $tNode -key distance]
# Does this arc discover a new target node?
if { [expr $tDist == -1] } {
# Yes it does
# Save the distance from the source node and the
# arc that leads to the predecessor and mark the
# node as found. It will now be considered next time
# we look for the shortest arc.
update_node $g $tNode [expr $sDist + $weight] $tArc 1
}
}
}
#===================================================================
# outonly_nodes
# An 'out only' node is one that has no inward arcs. That is the
# node can have no predecessor as no no arc has it as its target
# This function finds all such nodes with the graph.
#
# g - The graph
proc outonly_nodes { g } {
set result {}
foreach node [$g nodes] {
if { [$g node degree -in $node] == 0 } {
lappend result $node
}
}
return $result
}
#-------------------------------------------------------------------
# make_toplevel_node
# When checking for DAGness and/or performing a topological sort
# a single top-level node to start the process from is required.
# This function transforms the graph so that it has that single
# top node. The name of this new node is returned.
proc make_toplevel_node { g } {
# Find toplevel node(s)
set oonList [outonly_nodes $g]
# If there are no out-only nodes then we can't sort the
# graph as it must have at least one cycle.
if { [llength $oonList] == 0 } {
error "$g is not a DAG. Graph contains one or more cycles"
}
# If there are more than one out-only nodes a new parent node that
# provides a single starting point for the sort must be created above
# these nodes. To make life simple we do this for one node as well.
set startNode [$g node insert]
# Connect the startNode to the original out-only nodes
foreach node $oonList {
$g arc insert $startNode $node
}
return $startNode
}
#-------------------------------------------------------------------
# topo_node
# This routine is called by the topological_sort when a node has
# been completely explored. The routine has two functions.
proc topo_node { dir g n } {
variable topo_sort
lappend topo_sort $n
}
#===================================================================
# topological_sort
# Perform a topological sort on the graph. This will return a
# list of nodes giving an ordering on the nodes such that all
# arcs go from left to right. Only an acyclic graph can have a
# topological sort, because a directed cycle must eventually
# return home to the source of the cycle. However, every DAG
# has at least one topological sort.
#
# g - The graph to sort
proc topological_sort { g } {
variable topo_sort {}
# Make a toplevel (parentless) node to start the sort from
set startNode [make_toplevel_node $g]
# Setup. Update the nodes and arcs with the required
# flags etc.
init_graph $g
# Walk the graph, nodes are added to the topo_sort list
# in the order that they are marked as completely explored.
# The topological sort is the reverse of this order.
# NB - This code won't detect any cycles in the data so it's
# up to the user to determine that the graph is a DAG
# The is_DAG function can be used for this
$g walk $startNode -order post -type dfs -command topo_node
# Remove the start node we created from the graph, all of the
# arcs will be removed as well.
$g node delete $startNode
# Reverse the list and discard the node that
# was added to the graph by this routine.
set result {}
for { set i [expr [llength $topo_sort] - 2] } {$i >= 0} {incr i -1} {
lappend result [lindex $topo_sort $i]
}
return $result
}
#===================================================================
# dag_dfs
# Recursive Depth First Search of the graph. If a node is
# discovered that has the used flag set to 1, then it has been
# found by a cycle through one of its children and the graph
# is not a DAG. A stack-based implementation of this function
# would be better. It would not risk blowing up if the recursion
# level got too big.
#
# g - The graph
# n - The next node to check
proc dag_dfs { g n } {
# We have found a new node, mark it as such.
$g node set $n -key used 1
# Process each of the arcs out of the node.
foreach arc [$g arcs -out $n] {
# Find out the details of the target node.
set tNode [$g arc target $arc]
set used [$g node get $tNode -key used]
# If the node has been discovered but not completed then
# this is a back edge and the graph contains a cycle.
if { $used == 1 } {
error "$g is not a DAG. Graph contains one or more cycles"
}
dag_dfs $g $tNode
}
# We have completely used this node.
$g node set $n -key used 2
}
#===================================================================
# is_DAG
# Determine if the graph is a DAG, that is that it contains no
# cycles. If it isn't a DAG then an error is thrown!
#
# g - The graph
proc is_DAG { g } {
# Make a toplevel (parentless) node to start the sort from
set startNode [make_toplevel_node $g]
# Setup. Update the nodes and arcs with the required
# flags etc.
init_graph $g
# Use the simple recursive definition of depth-first search
# to search through the tree for back edges.
set result [dag_dfs $g $startNode]
# Remove the start node we created from the graph. All of the
# arcs will be removed as well.
$g node delete $startNode
return $result
}
}