see also edit
- scriptSplit, by dgp
- not a complete parser, but may be a lightweight solution to a class of uses for tcl parsing.
description edit
my original need for it was for cross-referencing tcl code (to generate an index which answers the question: on which lines is this command/variable/whatever used?), but there are many other interesting applications.one is to write a command that works like a "proc with preprocessor": the body is parsed, then the preprocessor examines the code (probably replacing some special construction, e.g. syntactic sugar with the corresponding byte-compilable raw tcl), the result is translated back to a tcl script, and finally that is given as the body to the normal proc command. i currently (2003-08-19) haven't implemented the "back" (from parser output format to tcl script) part of this, but the parser is strict enough that this should be possible.the output format of the parser is something which i call a "parser tree". it has the format- type interval text [subtree ...]
- rs
- root for a parsed script.
- rx
- root for a parsed expression (not implemented yet).
- cd
- a command (subtrees are the words).
- sv
- scalar variable substitution.
- sa
- array variable substitution.
- sc
- command substitution.
- sb
- backslash substitution.
- lr
- raw ("literate") text.
- lq
- raw text, in quotes.
- lb
- raw text, in braces.
- mr
- text of which a part is generated by substitution.
- mq
- like mr, but in quotes.
- nc
- a comment.
- ne
- a syntax error detected by the parser.
- np
- a "placeholder" (used internally).
code edit
## ## this is file `parsetcl.tcl', ## generated with the docstrip utility. ## ## the original source files were: ## ## parsetcl.dtx (with options: `pkg') ## ## in other words: ## *************************************** ## * this source is not the true source. * ## *************************************** ## the true source is parsetcl.dtx in ## http://ctan.org/tex-archive/macros/latex/contrib/tclldoc/examples ## ## (c) 2003 lars hellstr\"om ## ## it is preferred that you apply the distribution and modification ## conditions of the latex project public license (lppl) for this file, ## but you may alternatively choose to apply bsd/tcl-style license ## conditions (either is ok). the latest version of the lppl is in ## http://www.latex-project.org/lppl.txt ## and version 1.2 or later is part of all distributions of latex ## version 1999/12/01 or later. ## namespace eval parsetcl {} package require tcl 8.4 package provide parsetcl 0.1 proc parsetcl::flush_whitespace {script index_var cmdsep} { upvar 1 $index_var index if {[ if {$cmdsep} then { regexp -start $index -- {\a([ \t-\r;]|\\\n)+} $script match } else { regexp -start $index -- {\a([ \t\v\f\r]|\\\n)+} $script match } ]} then { incr index [string length $match] return [string length $match] } else { return 0 } } proc parsetcl::parse_command {script index_var nested} { upvar 1 $index_var index flush_whitespace $script index 1 switch -- "[string index $script $index]$nested" {#0} - {#1} { regexp -start $index -indices -- {\a#([^\n\\]|\\.)*(\\$)?} \ $script interval incr index regsub -all -- {\\\n[ \t]*}\ [string range $script $index [lindex $interval 1]]\ { } text set index [expr {[lindex $interval 1] + 1}] return [list nc $interval $text] } 0 - 1 - \]1 { return [list np {} {}] } set res [list cd [list $index {}] {}] set next [parse_word $script index $nested] while {[lindex $next 0] ne "np"} { lappend res $next set next [parse_word $script index $nested] } lset res 1 1 [lindex $res end 1 1] return $res } proc parsetcl::basic_parse_script {script} { set index 0 set res [list rs [list $index {}] {}] while {[lindex [set next [parse_command $script index 0]] 0] ne "np"} { lappend res $next } incr index -1 lset res 1 1 $index return $res } proc parsetcl::parse_word {script index_var nested} { upvar 1 $index_var index switch -- [string index $script $index] \{ { parse_braced_word $script index $nested } \" { parse_quoted_word $script index $nested } {} - \; - \n { list np {} {} } \] { if {$nested} then { list np {} {} } else { parse_raw_word $script index $nested } } default { parse_raw_word $script index $nested } } proc parsetcl::parse_braced_word {script index_var nested} { upvar 1 $index_var index set res [list lb [list $index {}]] set depth 1 set text {} incr index while {$depth>0} { regexp -start $index -- {\a([^{}\\]|\\[^\n])*} $script match append text $match incr index [string length $match] switch -- [string index $script $index] \{ { incr depth incr index append text \{ } \} { incr depth -1 incr index if {$depth} { append text \} } } \\ { if {[regexp -start $index -- {\a\\\n[ \t]*} $script match]}\ then { incr index [string length $match] append text { } } else { append text \\ break } } {} { break } } if {$depth>0} then { lset res 1 1 $index lappend res $text [list ne [list {} $index] {missing close-brace}] lset res 3 1 0 [incr index] return $res } lset res 1 1 [expr {$index - 1}] lappend res $text if {[flush_whitespace $script index 0]} then {return $res} switch -- [string index $script $index] \n - \; - {} { return $res } \] { if {$nested} then {return $res} } lappend res [list ne [list $index [expr {$index - 1}]]\ {missing space after close-brace}] return $res } proc parsetcl::parse_quoted_word {script index_var nested} { upvar 1 $index_var index set res [list lq [list $index {}] {}] set text {} incr index while {1} { switch -- [string index $script $index] \\ { lappend res [parse_backslash $script index] append text [lindex $res end 2] } \$ { lappend res [parse_dollar $script index] lset res 0 mq } \[ { lappend res [parse_bracket $script index] lset res 0 mq } \" { incr index break } {} { lappend res [list ne [list $index [expr {$index - 1}]]\ {missing close-quote}] break } default { regexp -start $index -- {[^\\$\["]*} $script match set t $index incr index [string length $match] lappend res [list lr [list $t [expr {$index - 1}]] $match] append text $match } } lset res 1 1 [expr {$index - 1}] if {[lindex $res 0] eq "lq"} then { lset res 2 $text if {[llength $res] == 4 && [lindex $res 3 0] eq "lr"} then { set res [lrange $res 0 2] } } if {[flush_whitespace $script index 0]} then {return $res} switch -- [string index $script $index] \n - \; - {} { return $res } \] { if {$nested} then {return $res} } lappend res [list ne [list $index [expr {$index - 1}]]\ {missing space after close-quote}] return $res } proc parsetcl::parse_raw_word {script index_var nested} { upvar 1 $index_var index set res [list] set type lr set interval [list $index] set text {} while {1} { switch -- [string index $script $index] \\ { if {[string index $script [expr {$index+1}]] eq "\n"} then { break } lappend res [parse_backslash $script index] append text [lindex $res end 2] continue } \$ { lappend res [parse_dollar $script index] set type mr continue } \[ { lappend res [parse_bracket $script index] set type mr continue } \t - \n - \v - \f - \r - " " - \; - {} { break } if {$nested} then { if {![ regexp -start $index -- {\a[^\\$\[\]\t-\r ;]+} $script match ]} then {break} } else { regexp -start $index -- {\a[^\\$\[\t-\r ;]+} $script match } set t $index incr index [string length $match] lappend res [list lr [list $t [expr {$index - 1}]] $match] append text $match } if {[llength $res]==1} then { set res [lindex $res 0] } else { lappend interval [expr {$index - 1}] if {$type ne "lr"} then {set text {}} set res [linsert $res 0 $type $interval $text] } flush_whitespace $script index 0 return $res } proc parsetcl::parse_backslash {script index_var} { upvar 1 $index_var index set start $index incr index set ch [string index $script $index] set res [list lr [list $index $index] $ch] switch -- $ch a { set res [list sb [list $start $index] \a $res] } b { set res [list sb [list $start $index] \b $res] } f { set res [list sb [list $start $index] \f $res] } n { set res [list sb [list $start $index] \n $res] } r { set res [list sb [list $start $index] \r $res] } t { set res [list sb [list $start $index] \t $res] } v { set res [list sb [list $start $index] \v $res] } x { if {[regexp -start [expr {$index + 1}] -- {\a[0-9a-fa-f]+}\ $script match]} then { scan [string range $match end-1 end] %x code incr index [string length $match] lset res 1 1 $index lset res 2 "x$match" set res [list sb [list $start $index]\ [format %c $code] $res] } else { set res [list sb [list $start $index] x $res] } } u { if {[regexp -start [expr {$index + 1}] -- {\a[0-9a-fa-f]{1,4}}\ $script match]} then { scan $match %x code incr index [string length $match] lset res 1 1 $index lset res 2 "u$match" set res [list sb [list $start $index]\ [format %c $code] $res] } else { set res [list sb [list $start $index] u $res] } } \n { regexp -start [expr {$index + 1}] -- {\a[ \t]*} $script match incr index [string length $match] lset res 1 1 $index lset res 2 "\n$match" set res [list sb [list $start $index] " " $res] } {} { return [list sb [list $start $start] \\] } default { if {[regexp -start $index -- {\a[0-7]{1,3}} $script match]} then { scan $match %o code incr index [expr {[string length $match]-1}] lset res 1 1 $index lset res 2 $match set res [list sb [list $start $index] [format %c $code] $res] } else { set res [list sb [list $start $index] $ch $res] } } incr index return $res } proc parsetcl::parse_bracket {script index_var} { upvar 1 $index_var index set res [list sc [list $index {}] {}] incr index while {[lindex [set next [parse_command $script index 1]] 0] ne "np"} { lappend res $next } if {[string index $script $index] eq "\]"} then { lset res 1 1 $index incr index return $res } else { lappend res [list ne [list $index [expr {$index-1}]]\ {missing close-bracket}] lset res 1 1 [expr {$index-1}] return $res } } set parsetcl::varname_re {\a(\w|::)+} proc parsetcl::parse_dollar {script index_var} { upvar 1 $index_var index set res [list {} [list $index {}] {}] incr index if {[string index $script $index] eq "\{"} then { lset res 0 sv set end [string first \} $script $index] if {$end<0} then { set end [expr {[string length $script] - 1}] lappend res [list lb [list $index $end]\ [string range $script [expr {$index + 1}] end]]\ [list ne [list [expr {$end+1}] $end]\ {missing close-brace for variable name}] } else { lappend res [list lb [list $index $end]\ [string range $script [expr {$index + 1}] [expr {$end-1}]]] } lset res 1 1 $end set index [expr {$end + 1}] return $res } variable varname_re if {![regexp -start $index -- $varname_re $script match]} then { if {[string index $script $index] eq "("} then { set match {} } else { return [list lr [list [lindex $res 1 0] [lindex $res 1 0]] \$] } } set t $index incr index [string length $match] lappend res [list lr [list $t [expr {$index-1}]] $match] if {[string index $script $index] ne "("} then { lset res 0 sv lset res 1 1 [lindex $res 3 1 1] return $res } lset res 0 sa incr index set subres [list lr [list $index {}] {}] lappend res {} set text {} while {1} { switch -- [string index $script $index] \\ { lappend subres [parse_backslash $script index] append text [lindex $subres end 2] } \$ { lappend subres [parse_dollar $script index] lset subres 0 mr } \[ { lappend subres [parse_bracket $script index] lset subres 0 mr } ) { lset subres 1 1 [expr {$index - 1}] break } {} { lappend res\ [list ne [list $index [incr index -1]] {missing )}] lset subres 1 1 $index break } default { regexp -start $index -- {[^\\$\[)]*} $script match set t $index incr index [string length $match] lappend subres [list lr [list $t [expr {$index - 1}]] $match] append text $match } } if {[lindex $subres 0] eq "lr"} then {lset subres 2 $text} if {[llength $subres] == 4} then {set subres [lindex $subres 3]} lset res 1 1 $index incr index lset res 4 $subres return $res } # # the following are utility procedures: # proc parsetcl::format_tree {tree base step} { set res $base append res \{ [lrange $tree 0 1] { } if {[regexp {[\n\r]} [lindex $tree 2]]} then { append res [string range [list "[lindex $tree 2]\{"] 0 end-2] } else { append res [lrange $tree 2 2] } if {[llength $tree]<=3} then { append res \} return $res } elseif {[llength $tree] == 4 &&\ [string match {s[bv]} [lindex $tree 0]]} then { append res { } [format_tree [lindex $tree 3] {} {}] \} return $res } append res \n foreach subtree [lrange $tree 3 end] { append res [format_tree $subtree $base$step $step] \n } append res $base \} } proc parsetcl::offset_intervals {tree offset} { set res [lrange $tree 0 2] foreach i {0 1} { lset res 1 $i [expr {[lindex $res 1 $i] + $offset}] } foreach subtree [lrange $tree 3 end] { lappend res [offset_intervals $subtree $offset] } return $res } proc parsetcl::reparse_lb_as_script {tree_var index parsed} { upvar 1 $tree_var tree set node [lindex $tree $index] switch -- [lindex $node 0] lb - lr - lq { set base [expr {[lindex $node 1 0] + 1}] if {[lindex $node 0] eq "lb"} then { set script [string range $parsed $base\ [expr {[lindex $node 1 1] - 1}]] } else { set script [lindex $node 2] } lset tree $index\ [offset_intervals [basic_parse_script $script] $base] if {[lindex $node 0] eq "lb"} then { return 2 } else { return 1 } } default { return 0 } } proc parsetcl::walk_tree {tree_var index_var args} { upvar 1 $tree_var tree $index_var idxl set idxl [list] set i 0 while {$i>=0} { if {$i==0} then { uplevel 1 [list switch -regexp --\ [lindex [lindex $tree $idxl] 0] $args] set i 3 } elseif {$i < [llength [lindex $tree $idxl]]} then { lappend idxl $i set i 0 } elseif {[llength $idxl]} then { set i [lindex $idxl end] set idxl [lrange $idxl 0 end-1] incr i } else { set i -1 } } } proc parsetcl::simple_parse_script {script} { set tree [parsetcl::basic_parse_script $script] walk_tree tree indices cd { switch -- [lindex [lindex $tree $indices] 3 2] if { for {set i 3} {$i < [llength [lindex $tree $indices]]}\ {incr i} { switch -- [lindex [lindex $tree $indices] $i 2]\ if - elseif { incr i; continue } then - else { incr i } parsetcl::reparse_lb_as_script tree\ [linsert $indices end $i] $script } } while { parsetcl::reparse_lb_as_script tree [linsert $indices end 5]\ $script } for { parsetcl::reparse_lb_as_script tree [linsert $indices end 4]\ $script parsetcl::reparse_lb_as_script tree [linsert $indices end 6]\ $script parsetcl::reparse_lb_as_script tree [linsert $indices end 7]\ $script } foreach { parsetcl::reparse_lb_as_script tree [linsert $indices end end]\ $script } catch { parsetcl::reparse_lb_as_script tree [linsert $indices end 4]\ $script } proc { parsetcl::reparse_lb_as_script tree [linsert $indices end 6]\ $script } } return $tree } ## ## ## end of file `parsetcl.tcl'.if 0 { as an example of how this works, consider applying parsetcl::simple_parse_script to the body of the parray procedure: }
auto_load parray parsetcl::simple_parse_script [info body parray]this returns a long list whose structure is rather hard to follow. however, the utility procedure parsetcl::format_tree can make the structure more visible. the command
parsetcl::format_tree [parsetcl::simple_parse_script [info body parray]] { } { }returns
{rs {0 467} {} {cd {5 20} {} {lr {5 9} upvar} {lr {11 11} 1} {sv {13 14} {} {lr {14 14} a}} {lr {16 20} array} } {cd {26 90} {} {lr {26 27} if} {lb {29 51} {![array exists array]}} {rs {54 89} {} {cd {56 84} {} {lr {56 60} error} {mq {62 84} {} {sb {63 64} {"} {lr {64 64} {"}}} {sv {65 66} {} {lr {66 66} a}} {sb {67 68} {"} {lr {68 68} {"}}} {lr {69 83} { isn't an array}} } } } } {cd {96 105} {} {lr {96 98} set} {lr {100 103} maxl} {lr {105 105} 0} } {cd {111 244} {} {lr {111 117} foreach} {lr {119 122} name} {sc {124 159} {} {cd {125 158} {} {lr {125 129} lsort} {sc {131 158} {} {cd {132 157} {} {lr {132 136} array} {lr {138 142} names} {lr {144 148} array} {sv {150 157} {} {lr {151 157} pattern}} } } } } {rs {162 243} {} {cd {164 238} {} {lr {164 165} if} {lb {167 197} {[string length $name] > $maxl}} {rs {200 237} {} {cd {206 235} {} {lr {206 208} set} {lr {210 213} maxl} {sc {215 235} {} {cd {216 234} {} {lr {216 221} string} {lr {223 228} length} {sv {230 234} {} {lr {231 234} name}} } } } } } } } {cd {250 297} {} {lr {250 252} set} {lr {254 257} maxl} {sc {259 297} {} {cd {260 296} {} {lr {260 263} expr} {lb {265 296} {$maxl + [string length $a] + 2}} } } } {cd {303 466} {} {lr {303 309} foreach} {lr {311 314} name} {sc {316 351} {} {cd {317 350} {} {lr {317 321} lsort} {sc {323 350} {} {cd {324 349} {} {lr {324 328} array} {lr {330 334} names} {lr {336 340} array} {sv {342 349} {} {lr {343 349} pattern}} } } } } {rs {354 465} {} {cd {356 394} {} {lr {356 358} set} {lr {360 369} namestring} {sc {371 394} {} {cd {372 393} {} {lr {372 377} format} {lr {379 384} %s(%s)} {sv {386 387} {} {lr {387 387} a}} {sv {389 393} {} {lr {390 393} name}} } } } {cd {397 460} {} {lr {397 400} puts} {lr {402 407} stdout} {sc {409 460} {} {cd {410 459} {} {lr {410 415} format} {lq {417 427} {%-*s = %s}} {sv {429 433} {} {lr {430 433} maxl}} {sv {435 445} {} {lr {436 445} namestring}} {sa {447 459} {} {lr {448 452} array} {sv {454 458} {} {lr {455 458} name}} } } } } } } }this is horrible reading, but much easier for a program to do things with than the original script.
discussion edit
jjs: it would be enormously helpful if you provided a script which took the parse tree and turned it back into the original code. doing such an identity transformation on a large group of tcl scripts would be a very effective unit test for your code, and having that transformation script as a starting point would make it much simpler for folks looking to make use of your code. i realize you acknowledge that'd be a useful next step, but consider this encouragement to actually follow through :-).here's a partial inversion of the parser - cmcc
namespace eval parsetcl {} proc ::parsetcl::unparse {tree} { eval $tree } proc ::parsetcl::lr {interval text args} { return $text } proc ::parsetcl::lb {interval text args} { return \{$text\} } proc ::parsetcl::lq {interval text args} { return \"$text\" } proc ::parsetcl::sb {interval text args} { return "\\$text" } proc ::parsetcl::sv {interval text args} { return "\$[eval [lindex $args 0]]" } proc ::parsetcl::sa {interval text args} { foreach a [lrange $args 1 end] { append result [eval $a] } return "\$[eval [lindex $args 0]]($result)" } proc ::parsetcl::sc {interval text args} { foreach a $args { append cmd " " [eval $a] } return \[${cmd}\] } proc ::parsetcl::mr {interval text args} { foreach a $args { append result [eval $a] } return ${result} } proc ::parsetcl::mq {interval text args} { foreach a $args { append result [eval $a] } return \"${result}\" } proc ::parsetcl::cd {interval text args} { foreach a $args { append cmd [eval $a] " " } return ${cmd} } proc ::parsetcl::rs {interval text args} { foreach a $args { append cmd [eval $a]\n } return \{\n$cmd\n\} }
tv 2004-05-12: interesting page and comment. reminds me of a text-to-3dobject_database program i once made and then added a text generator for editing hierarchical 3d object source files after the oo rep got transformed. which leads me to the questions: is the parser complete enough ? that could make it interesting to visualize the results in bwise, and possibly lead to speed improvements and compilation analysis/speedup.
am 2005-03-13: i used the above code to create a prototype of a tool to generate a "call-tree". while it is far from complete, it does show the capabilities (as far as i am concerned) of such a parser.other uses i can think of:
- slicing - flesh out those parts of the code influenced by a particular variable
- instrumentation for test coverage
- generating structure graphs (or whatever the appropriate name is) that show the complexity of a procedure
- determining all kinds of metrics
- the simple_parse_script procedure does not parse [switch] bodies lars h: yes, i know. what prevented me from adding that capability was that then i would also need a list parser (in one form of switch, the bodies are elements of a list), and at the time i didn't have the time to write that. anyway simple_parse_script was never meant to be "the real thing", but only a testing aid and proof of concept. -- am i quite understand. i realised the other day that another construct that is missing is [uplevel], which behaves more or less like a [foreach] loop in the sense of the parser. it is just so close to the "real thing" that one starts to complain about everything :) -- lars h: uplevel is really most like eval (i think at the byte-code level eval is equivalent to uplevel 0), which in general is anther can of worms. my uplevels are quite often on the form "uplevel 1 [list ::set $var $val]"...
- another thing that needs looking into, i guess, is that not all parsed code is accepted by the [format_tree] procedure. i have not looked into this more closely, but the list commands complained over at least one piece of code i have. -- lars h: rereading the code, i cannot see why. please give an example. - am i must have made a very stupid mistake (like passing the original code, instead of the parse result) as i could not reproduce it.
lars h, 2005-04-02: it occurs to me that this should probably be contributed to tcllib, but right now i have other things to do. maybe next month. if in the meantime someone wants to contribute however, then some tests would be nice.
[balaji] 2006-04-21: thanks for the great tool! i was wondering if there is a way to preserve the lines #s and line breaks as part of the parse tree. this would greatly help re-creating the structure from the parsed tree. i'd appreciate if you can provide some direction in how to go about doing this.lars h: comments (if that's what "#s" means) are preserved by default, so that shouldn't be a problem. line breaks (and subsequent indentation) disappear between commands, but it shouldn't be too hard to reinsert them in a post-processing step.
regexp -all -indices -inline {\n} $scriptwill return a list of all positions of newlines in the script. you can compare this to the intervals of cd nodes in the tree, and that way find out where the newlines between commands are.come to think of it, the following is probably the easiest way to get what you want
set tree [parsetcl::simple_parse_script $script] set nll [regexp -all -inline -indices {\n\s*} $script] parsetcl::walk_tree tree where rs { # all rs nodes are rebuilt set newnode [lrange [lindex $tree $where] 0 2] set subtreel [lrange [lindex $tree $where] 3 end] # add ni nodes for each newline in the interval of this rs node set first [lindex $newnode 1 0] set last [lindex $newnode 1 1] foreach interval $nll { if {[lindex $interval 0] >= $first && [lindex $interval 1] <= $last} then { lappend subtreel [list ni $interval\ [eval [linsert $interval 0 string range $script]]] } } # sort the nodes by starting position. # skip those nodes which are contained within an earlier node; # they're ni nodes which don't belong as children of this rs node # (they might belong as more distant descendants, but then we'll # take care of them when we get to the descendant rs node). set last -1 foreach subtree [lsort -dictionary -index 1 $subtreel] { if {[lindex $subtree 1 1] > $last} then { lappend newnode $subtree set last [lindex $subtree 1 1] } } lset tree $where $newnode }if you're really strict about it, you should probably give the same treatment to sc nodes as to the rs nodes, but i suspect you're looking more for the exampe than for the rigorous solution.
escargo 2008-02-19: this is not one of the most transparent pieces of code that i've seen. what i would like to do is get the parse tree out of its internal tree format and into something more like a struct::tree. failing that, a more intuitive (or documented) way of walking the tree would be appreciated.lars h: well, as the comments say, this is not the true source for this piece of code; the true source (link above updated 2008-02-20) does explain syntax and calling conventions for the various procedures. (of course, the .dtx format may be unfamiliar. see docstrip, docstrip and tclldoc for details.)using struct::tree for this kind of tree feels like overkill to me (why should one name all the nodes of a tree, especially a parse tree?), but then i'm more functional than object-oriented in style. for what it's worth, the data is code technique makes it straightforward to convert this tree format to another format.escargo 2008-02-20: ah, the light dawns. so if i define procedures with the names rs, nc, etc., then i can just [eval] the tree and do whatever transforms i want.still, it's at least possible. that might make it clearer when i look at the code.i don't happen to have latex available, so i can't run the input files through to produce the output files. it might have been a kindness to make the outputs available as well, i have what i have.(later in the day) - i've made progress with decoding the tree. the only further things i wish were documented here are:
- which types are leaf nodes and which are interior nodes
- which type have constant arity (especially if it's exactly one) and which are variable
{rs {0 4} {} {cd {0 4} {} {mr {0 4} {} {sv {0 2} {} {lr {1 2} ::}} {lr {3 4} :a} } } }whereas it should be parsed as
{rs {0 4} {} {cd {0 4} {} {sv {0 4} {} {lr {1 4} :::a}} } }i verified this with the namespace man pages and a plain tcl 8.4: two or more adjancent colons are treated as one namespace separator.
cmcc 2010-06-04 02:30:49:i've just created a program based on the latest parsetcl which parses a tcl-like configuration language. it's here: configuration language using parsetcl, and was much easier and more satisfying than trying to use unknown or interp to achieve the same end.
cmcc 2013-11-11:slight modification to handle switch both old and new syntax.
proc ::parsetcl::simple_parse_script {script} { set tree [basic_parse_script $script] walk_tree tree indices {^cd$} { switch -- [lindex [lindex $tree $indices] 3 2] { if { for {set i 3} {$i < [llength [lindex $tree $indices]]} {incr i} { switch -- [lindex $tree {*}$indices $i 2] { if - elseif { incr i reparse_lb_as_mb tree [linsert $indices end $i] $script continue } then - else { incr i } } reparse_lb_as_script tree [linsert $indices end $i] $script } } switch { set i 4 while {$i < [llength [lindex $tree $indices]]} { # skip options set el [lindex $tree {*}$indices $i 2] incr i if {![string match -* $el]} break if {$el eq "--"} break } incr i if {$i == [llength [lindex $tree $indices]]-1} { # new-style switch puts stderr "sw-new style [lindex $tree {*}$indices end]" # step 1 reparse as mb reparse_lb_as_mb tree [linsert $indices end end] $script for {set j 4} {$j < [llength [lindex $tree {*}$indices end]]} {incr j 2} { set el [lindex $tree {*}$indices end $j] if {$el ne "-"} { reparse_lb_as_script tree [linsert $indices end end $j] $script } } } else { # old-style switch - we're just trusting it's even-length while {$i < [llength [lindex $tree $indices]]} { set el [lindex $tree {*}$indices $i] if {$el ne "-"} { reparse_lb_as_script tree [linsert $indices end $i] $script } incr i 2 ;# to next pair } } } while { reparse_lb_as_mb tree [linsert $indices end 4] $script reparse_lb_as_script tree [linsert $indices end 5] $script } for { reparse_lb_as_script tree [linsert $indices end 4] $script reparse_lb_as_mb tree [linsert $indices end 5] $script reparse_lb_as_script tree [linsert $indices end 6] $script reparse_lb_as_script tree [linsert $indices end 7] $script } foreach { reparse_lb_as_script tree [linsert $indices end end] $script } catch { reparse_lb_as_script tree [linsert $indices end 4] $script } proc { reparse_lb_as_script tree [linsert $indices end 6] $script } expr { for {set i 4} {$i < [llength [lindex $tree $indices]]} {incr i} { reparse_lb_as_mb tree [linsert $indices end $i] $script } } } } return $tree }