----
#Copyright 2003 George Peter Staplin
#You may use this under the same terms as Tcl.
#A Mini Tcl Parser in Tcl.
#version 5
interp alias {} loop {} while 1
set s {
A [B 123] [C 456 [C2 789]]
set x [+ 20 [* 30 200]]
list a b {a b} $c {$c} [d e; f g]
}
if 0 {
#We need to have autoloading source the parray code.
array set ar {}
parray ar
append s "\n[info body parray]"
}
#1 or 0
proc get.token {s iPtr tokPtr typePtr} {
upvar $iPtr i
upvar $tokPtr tok
upvar $typePtr type
set tok ""
set sLen [string length $s]
set lastChar ""
#unknown, brace, bracket, quote, end
set type unknown
set braceCount 0
set bracketCount 0
set quote 0
for {} {$i < $sLen} {incr i} {
set c [string index $s $i]
if {$braceCount} {
if {("\{" == $c) && ("\\" != $lastChar)} {
incr braceCount
} elseif {("\}" == $c) && ("\\" != $lastChar)} {
incr braceCount -1
}
if {0 == $braceCount} {
incr i
return 1
}
append tok $c
} elseif {$bracketCount} {
if {("\[" == $c) && ("\\" != $lastChar)} {
incr bracketCount
} elseif {("\]" == $c) && ("\\" != $lastChar)} {
incr bracketCount -1
}
if {0 == $bracketCount} {
incr i
return 1
}
append tok $c
} elseif {$quote} {
if {("\"" == $c) && ("\\" != $lastChar)} {
incr i
return 1
}
append tok $c
} else {
if {("\{" == $c) && ("\\" != $lastChar)} {
set type brace
incr braceCount
} elseif {("\[" == $c) && ("\\" != $lastChar)} {
set type bracket
incr bracketCount
} elseif {("\"" == $c) && ("\\" != $lastChar)} {
set type quote
set quote 1
} elseif {(" " == $c) || ("\t" == $c)} {
if {[string length $tok]} {
return 1
}
} elseif {("\n" == $c) || ("\r" == $c) || (";" == $c)} {
if {[string length $tok]} {
return 1
} else {
set type end
set tok $c
incr i
return 1
}
} else {
append tok $c
}
}
set lastChar $c
}
if {"unknown" ne $type} {
puts stderr "incomplete command: still in state of $type"
return 0
}
if {[string length $tok]} {
return 1
}
return 0
}
set ::level 0
proc parse s {
global level
set i 0
set tok ""
set type ""
loop {
set r [get.token $s i tok type]
if {!$r} break
#puts "TOK:$tok TYPE:$type"
if {"end" == $type} {
puts [string repeat " " $level]SEP
} elseif {"bracket" == $type} {
incr level
puts [string repeat " " $level]BRACK
parse $tok
incr level -1
} else {
puts [string repeat " " $level]TOK:$tok
}
}
}
proc main {} {
parse $::s
}
mainExample output:
$ tclsh85g.exe mini_tcl_parser-3.tcl
TOK:A
TOK:B
TOK:123
TOK:C
TOK:456
TOK:C2
TOK:789
TOK:set
TOK:x
TOK:+
TOK:20
TOK:*
TOK:30
TOK:200Lars H (19 aug 2003): It seems a bit simplistic. Consider:
% parse {list a b {a b} $c {$c} [d e; f g]}
TOK:list
TOK:a
TOK:b
TOK:a b
TOK:$c
TOK:$c
TOK:d
TOK:e
TOK:f
TOK:gThat $c and {$c} are the same can be an artifact of the way that the parse procedure presents the result (type information not shown), but the level of the f and g tokens is simply wrong.GPS: I've fixed this problem. Thanks for pointing it out. Here's the output with version 5:
$ tclsh85g.exe mini_tcl_parser-5.tcl SEP TOK:A BRACK TOK:B TOK:123 BRACK TOK:C TOK:456 BRACK TOK:C2 TOK:789 SEP TOK:set TOK:x BRACK TOK:+ TOK:20 BRACK TOK:* TOK:30 TOK:200 SEP TOK:list TOK:a TOK:b TOK:a b TOK:$c TOK:$c BRACK TOK:d TOK:e SEP TOK:f TOK:g SEP
Information for people who are interesting in parsing Tcl in Tcl. XOTclIDE have own Tcl Parser (Component IDETclParser) that is implemented object oriented in XOTcl. It is used to make syntax hightligting and syntax checker. The Parse Tree is build as nested XOTcl Objects that can be directly used to make operations on it.The XOTcIDE Parser also parse scripts from known Tcl commands. For example command (foreach variable list script). The parser knows that the last element of command is script and parse it as script. In this case the parser tries to emulate Tcl interpreter. It knows how some commands work. For example it knows that (set a 32) will define new variable a. Of course it will not work for special Tcl scripts as
set a {puts script}
if {$condition} $aBut normal case is if {$condition} {puts script}For such scripts the syntax checker can proof "puts" to be valid command.The nature of Tcl make it very easy to build parser. But only for "first evalutation level". At this level there are only commands and words. (Also internal Tcl Parser knows special commands as if and for and parse command arguments (words) as scripts)
