Updated 2011-05-12 08:47:28 by RLE

Author Jackson McCann.

Additional work is required on this package to make it fully compliant with the Graphviz file format. The main problem is that I only understand the subset that I use and I haven't had time to revisit this.

escargo 30 Jan 2007 - I note that this code requires struct::graph v1, and not the current version. (struct::graph v1 required Tcl 8.2; the version of struct::graph for v1 in tcllib says 1.2.1.
 #=======================================================================
 #
 # Package: ASDOT.tcl
 #
 # Purpose: A tcl package to allow the reading and writing of graphs 
 # using the the Graphviz  format.  
 # See http://www.research.att.com/sw/tools/graphviz/ for details of the 
 # Graphviz tools

 # Function Name  Description
 # -------------  -------------------------------------------------------
 # read_dot       Read and parse a .dot file.  Build a graph from the description
 # write_dot      Create a .dot file from a Tcl ::struct::graph object

 # History
 # Version  Created By  Description
 # -------  ----------  -------------------------------------------------
 # 1.0      J McCann    Initial version

 package provide ASDOT 1.0

 #=======================================================================
 # NAMESPACE ::ASDOT::

 namespace eval ASDOT {

    #===================================================================
    # Export the functions that the user should be calling
    namespace export read_dot write_dot

    # We use the tcLex package (1.2 was the latest at the time of 
    # writing) to parse the .dot package
    package require tcLex

    # The graph package is needed for obvious reasons so we include
    # the struct package that contains it
    package require struct

    # Define the namespace variables
    variable n_graph

    # List of all the possible attributes for a node
    variable n_nodeAttr { bottomlabel color comment distortion \
                              fillcolor fixedsize fontcolor \
                              fontname fontsize group height \
                              label layer orientation peripheries \
                              regular shape shapefile sides skew \
                              style toplabel URL width z }

    # List of all possible attributes for an edge (arc)
    variable n_edgeAttr { arrowhead arrowsize arrowtail color comment \
                              constraint decorate dir fontcolor \
                              fontname fontsize headlabel headport \
                              headURL label labelangle labeldistance \
                              labelfloat labelfontcolor labelfontname \
                              labelfontsize layer lhead ltail minlen \
                              samehead sametail style taillabel \
                              tailport tailURL weight }

    # List of all possible attributes for a graph
    variable n_graphAttr { bgcolor center clusterrank color comment \
                               compound concentrate fillcolor \
                               fontcolor fontname fontpath fontsize \
                               label labeljust labelloc layers margin \
                               mclimit nodesep nslimit nslimit1 ordering \
                               orientation page pagedir quantum rank \
                               rankdir ranksep ratio remincross rotate \
                               samplepoints searchsize size style URL }  
    
    # Create the lexer that will parse the graph
    lexer create ASDOT::dot \
        -line \
        -ec {graphHeader graphDetail attrList nodeSpecAttrList edgeSpec edgeSpecAttrList subGraph} \
        -prescript {
            # prescript - this is run before the lexer starts to work
            
            # subgraph contains a stack that is used to track the (possibly nested)
            # subgraphs within the graph specifications
            ::struct::stack ASDOT::subgraph
        } \
        -postscript {
            # postscript - this is run after the lexer has finished work

            # Destroy the stack now we have finished with it
            ASDOT::subgraph destroy
        } \
        {
            {initial} "digraph|static digraph" {graphType} {
                # The .dot file needs to start with a directed graph specifier.
                # Only directed graphs are handled by this parser
                dot begin graphHeader
            }

            {graphHeader} "\"(.*)\"" {x graphName} -
            {graphHeader} "[a-zA-Z][a-zA-Z0-9_]*" {graphName} {
                # Get the graph name from the .dot file.  If the graphname
                # was enclosed in quotes then it may contain escaped quotes, the
                # call to regsub removes these
                set graphName [regsub -all {\\\"} $graphName "\""]

                # Use the .dot graphname as the name of the graph command
                # created by the graph library package
                set ASDOT::n_graph [::struct::graph $graphName]
            }

            {graphHeader} "\{" {graphName} {
                # Match the opening brace for the graph details in the .dot
                # file.  Start the graphDetail processing when it has been 
                # located.
                dot begin graphDetail
            }

            {graphDetail} "([a-zA-Z0-9_]+)[ \t]*[=][ \t]*[\"]*([^\]\"]+)[\"]*[;,]*" {x name value} {
                # Picks up settings at the graph level.  Store these in the graph
                set value [string trim [regsub -all {\\\"} $value "\""]]
                $ASDOT::n_graph set -key $name $value
            }

            {graphDetail} "(graph|node|edge)[ \t]*[\[]" {x attrType} {
                # A graph can contain graph, node and edge sections that
                # define general properties.  This condition finds the 
                # start of these sections.  They are processed by the
                # arrtList statements.
                dot begin attrList
            }

            {graphDetail subGraph} "subgraph[ \t]*([a-zA-Z0-9_]+)[ \t]*\{" {x subGraphName} -
            {graphDetail subGraph} "subgraph[ \t]*\"(.*)\"[ \t]*\{" {x subGraphName} {
                # Store the subgraph name on the stack.  This information gets used later
                # when arcs are being created.  The stack gets stored on the arc when it is
                # created
                set subGraphName [regsub -all {\\\"} $subGraphName "\""]
                ASDOT::subgraph push $subGraphName
                dot begin subGraph
            }

            {graphDetail subGraph} "\"(.*)\"[ \t]*[\[;]" {x nodeName} - 
            {graphDetail subGraph} "([a-zA-Z0-9_]+)[ \t]*[\[;]" {x nodeName} {

                # Convert any escaped quotes to normal quotes
                set nodeName [regsub -all {\\\"} $nodeName "\""]

                # Now check for an edge spec that looks like a node spec because
                # of its format "333" -> "444" [ for example
                if { ![        regexp {.*[\"][ \t]*->[ \t]*[\"].*} $nodeName ] &&
                     ![        regexp {.*[\"]:.*[ \t]*->[ \t]*[\"].*} $nodeName ] } {

                    # This is a node definition - so create the new node if
                    # ir dosn't exist
                    set newNode $nodeName
                    if { ![$ASDOT::n_graph node exists $nodeName] } {
                        $ASDOT::n_graph node insert $nodeName
                    }
                    
                    # Now start looking at the node attribute list
                    dot begin nodeSpecAttrList
                } else {
                    # Reject this match and try again
                    dot reject
                }
            }

            {graphDetail subGraph} "\"(.*)\":([a-zA-Z0-9_]+)[ \t]*->" {x nodeName fromNodeRecord} - 
            {graphDetail subGraph} "([a-zA-Z0-9_]+):([a-zA-Z0-9_]+)[ \t]*->" {x nodeName fromNodeRecord} {
                # This marks the start of a new edge specification (with a record).
                # First save the name of the from/source node and the record
                set nodeName [regsub -all {\\\"} $nodeName "\""]
                set fromNode $nodeName

                # Now check for an edge spec that looks like a node spec because
                # of its format "333" -> "444" [ for example
                if { ![        regexp {.*[\"][ \t]*->[ \t]*[\"].*} $nodeName ] } {

                    # Initialize variables that will hold details of the edge
                    set toNode {}
                    set toNodeRecord {}

                    # Start processing the edge spec
                    dot begin edgeSpec
                } else {
                    # Reject this match and try again
                    dot reject
                }
            }

            {graphDetail subGraph} "\"(.*)\"[ \t]*->" {x nodeName} - 
            {graphDetail subGraph} "([a-zA-Z0-9_]+)[ \t]*->" {x nodeName} {
                # This marks the start of a new edge specification. First save
                # the name of the from/source node
                set nodeName [regsub -all {\\\"} $nodeName "\""]
                set fromNode $nodeName

                # Initialize variables that will hold details of the edge
                set toNode {}
                set toNodeRecord {}
                set fromNodeRecord {}
                
                # Start processing the edge spec
                dot begin edgeSpec
            }


            {graphDetail} "\}" {} {

                # Now go back to the graphHeader condition
                dot end

                # Now go back to the initial condition
                dot end
            }

            {attrList} "([a-zA-Z0-9_]+)[ \t]*[=][ \t]*[\"]*([^\],\"]+)[\"]*[,]*" {x name value} {
                # picks up attributes in the node|edge|graph sections and store these
                # in the graph
                set value [string trim [regsub -all {\\\"} $value "\""]]
                $ASDOT::n_graph set -key "$attrType,$name" $value
            }
            {attrList} "[\]]" {} {
                # End of the attribute list
                dot end
            }

            {nodeSpecAttrList} "([a-zA-Z0-9_]+)[ \t]*[=][ \t]*[\"]*([^\],\"]+)[\"]*[,]*" {x name value} {
                # When a node is explicitly created it can have attributes set.  Associate the
                # attributes with the node
                set value [string trim [regsub -all {\\\"} $value "\""]]
                $ASDOT::n_graph node set $newNode -key $name $value
            }
            {nodeSpecAttrList} "[\]]" {} {
                # End of the node specification
                dot end
            }

            {edgeSpec} ":([a-zA-Z][a-zA-Z0-9_]*)" {x nodeRecordName} {
                
                # Get the (optional) record identifier for the start/end
                # of the edge
                if { $toNode == {} } {
                    set fromNodeRecord $nodeRecordName
                } else {
                    # The arc will have been created by now so add the
                    # toNodeRecord to it
                    set toNodeRecord $nodeRecordName
                    $ASDOT::n_graph arc set $newArc -key toRecord $toNodeRecord
                }
            }

            {edgeSpec} "\"(.*)\"" {x nodeName} - 
            {edgeSpec} "[a-zA-Z0-9_]+" {nodeName} {

                set nodeName [regsub -all {\\\"} $nodeName "\""]
                set toNode $nodeName

                # The toNode and fromNode may not yet exist.  Check for them
                # and create them if necessary
                if { ![$ASDOT::n_graph node exists $fromNode] } {
                    $ASDOT::n_graph node insert $fromNode
                }

                # Now for the to node
                if { ![$ASDOT::n_graph node exists $toNode] } {
                    $ASDOT::n_graph node insert $toNode
                }

                # Create the new arc
                set newArc [$ASDOT::n_graph arc insert $fromNode $toNode]

                # If we are in a subgraph then add the subgraph stack to the
                # arc using the key subgraph
                if { [ASDOT::subgraph size] > 0 } {
                    $ASDOT::n_graph arc set $newArc -key "subgraph" \
                        [ASDOT::subgraph peek [ASDOT::subgraph size]]
                }

                # A from node record can be specified.  If one has then
                # store it with the details of the arc
                if { $fromNodeRecord != {} } {
                    $ASDOT::n_graph arc set $newArc -key fromRecord $fromNodeRecord
                }
                
                # Nodes and edges can be specified like this
                #    a0 -> a1 -> a2 ;
                # So copy the toNode to the fromNode in case this is happening
                set fromNode $toNode
            }

            {edgeSpec} "[\[]" {} {
                dot begin edgeSpecAttrList
            }

            {edgeSpec} "[\n]" {} -
            {edgeSpec} ";" {} {
                dot end
            }

            {edgeSpecAttrList} "([a-zA-Z0-9_]+)[ \t]*[=][ \t]*[\"](.*)[\"]" {x name value} {
                # An edge can have attributes associated with it.  This stores the edge
                # attributes into the graph
                set value [string trim [regsub -all {\\\"} $value "\""]]
                $ASDOT::n_graph arc set $newArc -key $name $value
            }

            {edgeSpecAttrList} "[\]]" {} {
                # Reached the end of an edge specification attribute list
                dot end
            }

            {subGraph} "\}" {} {
                # Reached the end of a subgraph so pop the name 
                # from the stack
                ASDOT::subgraph pop
                dot end
            }
        }
    
    #===================================================================
    # read_dot
    #    Read a .dot file, parse its contents and create a Tcl
    #    ::struct::graph from the contents.
    #
    # file - Name of the .dot file to be read
    proc read_dot { file } {

        variable n_graph

        # Open the file and read its contents into the
        # variable gspec
        set df [open $file r]
        set gspec [read $df]

        # Parse the .dot file into a ::struct::graph
        ASDOT::dot eval $gspec

        # Close the file
        close $df
        
        # Return the graph - the caller needs to clean this up
        return $n_graph
    }

    #-------------------------------------------------------------------
    # writeGraphAttr
    #    Check to see if a graph level attribute exists.  If it does 
    #    then write it into the output file
    # df - output file
    # g  - graph
    # pf - prefix this will be blank or one of node,|graph,|edge,
    # attr - attribute name
    proc writeGraphAttr { df g pf attr } {

        if { ![catch {
            set val [$g get -key "${pf}${attr}"]
        }] } {
            puts $df "   $attr = \"$val\""
        }
    }

    #-------------------------------------------------------------------
    # getNodeAttr
    #    A node may have zero, one or many attributes defined for it.
    #    If it has none just return a semicolon.  If it has one or more
    #    the return them in the correct square bracket delimiters
    proc getNodeAttr {g n} {
        
        variable n_nodeAttr

        set attrCnt 0

        set result "   \"${n}\" \[\n"
        foreach attrName $n_nodeAttr {
            if { ![catch {
                set val [$g node get $n -key $attrName]
            }] } {
                append result "      $attrName = \"$val\"\n"
                incr attrCnt
            }
        }
        append result "   \]\n"

        if { $attrCnt == 0 } {
            set result "$n ;"
        }
        
        return $result
    }

    #-------------------------------------------------------------------
    # getEdgeAttr
    #    An edge may have zero, one or many attributes defined for it.
    #    If it has none just return a semicolon.  If it has one or more
    #    the return them in the correct square bracket delimiters
    proc getEdgeAttr {g e} {
        
        variable n_edgeAttr

        set srcNode [$g arc source $e]
        set destNode [$g arc target $e]

        set attrCnt 0

        set arcDef "   \"${srcNode}\""
        # Check to see if a from node record has been specfied
        if { ![catch {
            set val [$g arc get $e -key "fromRecord"]
        }] } {
            append arcDef ":${val}"
        }

        append arcDef " -> "
        append arcDef "\"${destNode}\""
        # Check to see if a to node record has been specfied
        if { ![catch {
            set val [$g arc get $e -key "toRecord"]
        }] } {
            append arcDef ":${val}"
        }

        set result "$arcDef \[\n"

         foreach attrName $n_edgeAttr {
            if { ![catch {
                set val [$g arc get $e -key $attrName]
            }] } {
                append result "      $attrName = \"$val\"\n"
                incr attrCnt
            }
        }
        append result "   \]\n"

        if { $attrCnt == 0 } {
            set result "$arcDef ;"
        }
        
        return $result
    }

    #-------------------------------------------------------------------
    # lreverse
    #    Reverse the order of the elements in a list
    # l - list to reverse
    proc lreverse { l } {

        set result {}

        set ll [expr [llength $l] -1]
        for {set i $ll} {$i >= 0} {incr i -1} {
            lappend result [lindex $l $i]
        }
        
        return $result
    }

    #-------------------------------------------------------------------
    # sg_sort
    #    Decide the order of two elements of the with_sg list
    # e1 - element 1
    # e2 - element 2
    proc sg_sort { e1 e2 } {
        set e1_sg [lindex $e1 0]
        set e2_sg [lindex $e2 0]
        return [string compare $e1_sg $e2_sg]
    }

    #===================================================================
    # write_dot
    #
    # file - Name of the file to write the graph format data out to
    # graph - The graph to be written to the file
    proc write_dot { file graph } {

        variable n_nodeAttr
        variable n_edgeAttr
        variable n_graphAttr

        # Open the file for output
        set df [open $file w]

        # Write the header information into the graph
        puts $df "digraph \"[set graph]\" \{\n"

        # Write out any graph level attributes
        foreach attrName $n_graphAttr {
            writeGraphAttr $df $graph "" $attrName
        }

        # Write out the graph section where graph attributes
        # can also be specified
        puts $df "   graph \["
        foreach attrName $n_edgeAttr {
            writeGraphAttr $df $graph "graph," $attrName
        }
        puts $df "   \]\n"

        # Write out the node section where attributes for all
        # nodes are specified
        puts $df "\n   node \["
        foreach attrName $n_nodeAttr {
            writeGraphAttr $df $graph "node," $attrName
        }
        puts $df "   \]\n"

        # Write out the edge section where attributes for all
        # edges are specified
        puts $df "   edge \["
        foreach attrName $n_edgeAttr {
            writeGraphAttr $df $graph "edge," $attrName
        }
        puts $df "   \]\n"

        # Now write out the details of each node
        foreach node [lsort -dictionary [$graph nodes]] {
            puts $df [getNodeAttr $graph $node]
        }

        # Now write out the details of each arc.  This process is complicated
        # by the need to cater for subgraphs
        set with_sg {}
        set no_sg {}
        foreach arc [$graph arcs] {
            
            # First partition the arcs into those that do and those that
            # do not have subgraphs
            if { [catch {
                set sg [$graph arc get $arc -key "subgraph"]
            } ] } {
                lappend no_sg $arc
            } else {
                lappend with_sg [list [lreverse $sg] $arc]
            }
        }

        # Write out the arcs that ar not involved in a subgraph
        foreach arc $no_sg {
            puts $df "[getEdgeAttr $graph $arc]"
        }

        if { [llength $with_sg] > 0 } {

            # Now write out the arcs that are involved in a subgraph
            set last_sg {}
            set last_level 0
            
            foreach sg_arc [lsort -command sg_sort $with_sg] {
                set sg [lindex $sg_arc 0]
                set arc [lindex $sg_arc 1]
                set level [llength $sg]

                #puts $df "$last_level --> $level"
                #puts $df "$last_sg --> $sg"

                if { ! [string equal $last_sg $sg] } {
                    if { $level > $last_level } {
                        puts $df "   subgraph [lindex $sg end] \{"                
                    } elseif { $level == $last_level } {
                        puts $df "   subgraph [lindex $sg end] \{"                
                        if { $level > 0 } {
                            puts $df "   \}"
                        }
                    } elseif { $level < $last_level } {
                        
                        # Close of the difference in levels plus 1
                        set closeLevels [expr $last_level - $level + 1]
                        for {set i 0} {$i < $closeLevels} {incr i} {
                            puts $df "   \}"
                        }
                        # Start a new subgraph
                        puts $df "   subgraph [lindex $sg end] \{"                
                    }
                }

                puts $df "[getEdgeAttr $graph $arc]"
                
                set last_sg $sg
                set last_level $level
            }
            # Close of the last remaining subgraphs
            set closeLevels [expr $last_level - $level + 1]
            for {set i 0} {$i < $closeLevels} {incr i} {
                puts $df "   \}"
            }
        }
        # Write the trailing bracket into the graph
        puts $df "\}"
        
        # Close the file
        close $df
        
    }
 }