Thinking about the dodekalogue, so I threw this together. It's a Tcl parser in Tcl. It tries to follow the dodekalogue assiduously.
# tclParser.tcl - a naive first principles tcl parser
oo::class create Parser {
variable rest next accum done count endc escaped
method next {} {
incr count
set next [string index $rest 0]
append accum $next
set rest [string range $rest 1 end]
set done [expr {[string length $rest] == 0}]
return $next
}
method pushback {char} {
set rest $char$rest
}
method skip {{chars 1}} {
incr count $chars
set result [string range $rest 0 $chars-1]
set rest [string range $rest $chars end]
set done [expr {[string length $rest] == 0}]
return $result
}
method lookahead {{chars 1}} {
set done [expr {[string length $rest] <= $chars}]
return [string range $rest 0 $chars-1]
}
method inject {ch} {
append accum $ch
}
method consume {} {
set a $accum
set accum ""
return $a
}
method comment {} {
while {!$done && [my skip] ne "\n"} {}
}
method bsnl {} {
if {[my lookahead] eq "\n"} {
while {[string is space -strict [my lookahead]]} {my skip}
my inject " "
}
}
method goatse {eoc} {
my skip 3
return [list goatse [my cword $eoc]]
}
method brace {} {
set level -1
while {[set ch [my lookahead]] ne ""} {
switch -exact -- $ch {
\{ {
my next
incr level
}
\} {
my next
incr level -1
if {$level < 0} {
set result [string range [my consume] 1 end-1] ;# strip braces
return [list brace $result]
}
}
\\ {
if {[my lookahead 2] eq "\\\n"} {
my skip ;# skip the backslash
my bsnl ;# do \\n subst
} elseif {[my lookahead 2] eq "\\\\"} {
my next
my next
} elseif {[my lookahead 2] eq "\\\{"} {
my next
my next
} elseif {[my lookahead 2] eq "\\\}"} {
my next
my next
} else {
my next
}
}
default {
my next
}
}
}
error "unmatched brace '[my consume]'"
}
method skipws {} {
while {[string is space -strict [string index $rest 0]]} {
my skip
}
}
method eocommand {eoc} {
return [expr {[my lookahead] in $eoc}]
}
# skipcws - skip whitespace terminating on end of command
method skipcws {eoc} {
while {[string is space -strict [string index $rest 0]]} {
if {[my eocommand $eoc]} {
return -code break "end of command"
}
my skip
}
}
# skipclass - skip and return a contiguous prefix matching a class
method skipclass {cc {end end}} {
string is $cc -strict -failindex fail [string range $rest 0 $end]
if {[info exists fail]} {
set result [string range $rest 0 $fail-1]
my skip $fail
} else {
# the entire $rest matches class
set result [string range $rest 0 end]
my skip [string length $result]
}
return $result
}
# char - gather a character with backslash substitution
method char {} {
set escaped 0
if {[my lookahead] eq "\\"} {
my skip
set escaped 1
switch -exact -- [my lookahead] {
a - b - f - n - r - t - v {
my skip; my inject [subst \\[my lookahead]]
}
\n {
my skip; my bsnl; set next " "
}
\\ {
my skip; my inject [set next \\]
}
x {
my skip; ;# skip the literal "x"
set hex [my skipclass xdigit]
set hex [string index $hex end-1][string index $hex end]
my inject [set next [binary decode hex $hex]]
}
u {
my skip
string is xdigit -strict -failindex hexl $rest
if {![info exists hexl] || $hexl > 4} {
set hexl 4
}
set hex [my skip $hexl]
my inject [set next [subst \\$hex]]
unset hexl
}
default {
my next
}
}
} else {
my next
}
return $next
}
method bracket {} {
set commands {}
set eoc {\n ; "" \]}
while {!$done && ![my eocommand $eoc]} {
set command [my command $eoc]
if {[llength $command]} {
lappend commands $command
}
if {[my skip] eq "\]"} {
break
}
}
if {[llength $commands] == 1} {
return [lindex $commands 0]
} else {
return [list script $commands]
}
}
method var {} {
# ${name} form
if {[my lookahead] eq "\{"} {
while {!$done && [my char] != "\}"} {}
set var [string range [my consume] 0 end-1]
return [list var $var]
}
# $name or $name(index) form
set var ""
while {!$done &&
[string is alnum -strict [set ch [my char]]]
|| $ch eq ":"
|| $ch eq "_"} {
append var $ch
}
if {$ch ne "("} {
my pushback $ch
my consume
return [list var $var]
}
set index ""
while (!$done) {
if {[set ch [my char]] eq ")" && !$escaped} break
append index $ch
}
my consume
return [list var $var $index]
}
method quote {} {
if {[my next] ne "\""} {
error "parsing quote without quote"
}
my consume
set result {}
while {!$done} {
# accumulate a word character
switch -exact -- [my char] {
\" {
# got close quote
set accum [string range $accum 0 end-1]
if {$accum ne ""} {
lappend result [list literal $accum]
my consume
}
break
}
\[ {
if {$accum ne "\["} {
lappend result [list literal [string range [my consume] 0 end-1]]
} else {
my consume ;# delete open bracket
}
lappend result [my bracket]
}
\$ {
if {$accum ne "\$"} {
lappend result [list literal [string range [my consume] 0 end-1]]
}
lappend result [my var]
}
default {
}
}
}
if {$done} {
error "no close quote"
}
if {[llength $result] == 1} {
return [lindex $result 0]
} else {
return [list qcompound {*}$result]
}
}
method word {eoc} {
set result {}
while {!$done
&& [set la [my lookahead]] ni $eoc
&& ![string is space -strict $la]
} {
# accumulate a word character
switch -exact -- [set ch [my char]] {
\[ {
if {$accum ne "\["} {
lappend result [list literal [string range [my consume] 0 end-1]]
} else {
my consume ;# delete open bracket
}
lappend result [my bracket]
}
\$ {
if {$accum ne "\$"} {
lappend result [list literal [string range [my consume] 0 end-1]]
}
lappend result [my var]
}
default {
}
}
}
if {$accum ne ""} {
lappend result [list literal [my consume]]
}
if {[llength $result] == 1} {
return [lindex $result 0]
} else {
return [list compound {*}$result]
}
}
method cword {eoc} {
try {
my skipcws $eoc ;# skip whitespace and break on end of command
} on break {e eo} {
# end of command
return -code break "end of command"
}
if {[my lookahead 3] eq "{*}"} {
tailcall my goatse $eoc
}
switch -exact -- [my lookahead] {
\{ {
tailcall my brace
}
\" {
tailcall my quote
}
default {
tailcall my word $eoc
}
}
}
method command {{eoc {\n ; ""}}} {
# skip leading comments
my skipws ;# skip plain whitespace
while {[my lookahead] eq "#"} {
my comment
my skipws ;# skip all whitespace
}
if {$done} {
return {}
}
set result {}
while {!$done && ![my eocommand $eoc]} {
lappend result [my cword $eoc]
}
if {[llength $result]} {
return [list command {*}$result]
} else {
return {}
}
}
method parse {script} {
set rest $script
while {!$done} {
lappend commands [my command]
my skip ;# consume the end of command character
}
return [list script {*}$commands]
}
method reset {} {
set done 0
set endc ""
set escaped 0
catch {unset rest}
catch {unset next}
catch {unset accum}
catch {unset count}
}
constructor {args} {
my reset
}
}
oo::class create Interpreter {
method compound {args} {
set result {}
foreach el $args {
append result [my {*}$el]
}
return $result
}
method qcompound {args} {
return \"[my compound {*}$args]\"
}
method goatse {el} {
return \{*\}[my {*}$el]
}
method var {var args} {
if {[llength $args]} {
return \$${var}([lindex $args 0])
} else {
return \$$var
}
}
method brace {el} {
return \{$el\}
}
method quote {el} {
return \"$el\"
}
method literal {el} {
return $el
}
method command {args} {
foreach el $args {
lappend command [my {*}$el]
}
set caller [lindex [info level -1] 1]
if {$caller eq "script"} {
return [join $command]
} else {
return \[[join $command]\]
}
}
method script {args} {
foreach command $args {
if {![llength $command]} continue
lappend commands [my {*}$command]
}
return [join $commands \;]
}
}
if {0} {
set tclp [Parser new]
set script [$tclp parse [read stdin]]
puts $script
interpreter create I
puts [I {*}$script]
}
if {0} {
#set test "quote[test]passed"
#set test word[test]passed
#set test test$test(that)those
set test [list {*}$moop]
}
Parser create mp
Interpreter create mi
foreach m [info class methods Parser] {
mp reset
set parsed [mp parse [lindex [info class definition Parser $m] 1]]
puts "$m: [mi {*}$parsed]"
}