wdb This tool translates XML elements to Tcl
dicts where the keys are element, attribute, and content. Key element carries name such as rss; key attribute carries attributes such as href or id; key contents list of contained elements or dicts with key cdata and value text.
- procedure xmlToDict (src) transforms src to dict structure.
- procedure getElementByName (dict name) returns first contained element named name.
- procedure getElementById (dict id) returns element of dict with attribute id.
- procedure getAllElementsByName (dict name) returns all contained elements named name.
Currently no write procedures. License
OLL like always.
package provide XmlDict 0.1
namespace eval XmlDict {
namespace import ::tcl::mathop::+ ::tcl::mathop::-
namespace export get* xml*
}
proc ::XmlDict::splitByPattern {src {pat {<!\[CDATA\[(?!\]\]>).*?\]\]>}}} {
set ranges [regexp -inline -indices -all $pat $src]
if {$ranges eq ""} then {
set src
} else {
lappend result0 [string range $src 0 [- [lindex $ranges 0 0] 1]
foreach\
range0 [lrange $ranges 0 end-1]\
range1 [lrange $ranges 1 end] {
lappend result0 [string range $src {*}$range0]
lassign $range0 - a
lassign $range1 b c
lappend result0 [string range $src [+ $a 1] [- $b 1]]
}
lappend result0 [string range $src {*}[lindex $ranges end]]\
[string range $src [+ [lindex $ranges end end] 1] end]
foreach str $result0 {
if {$str ne ""} then {
lappend result $str
}
}
lappend result
}
}
proc ::XmlDict::splitByMarkup src {
set src [regsub -all {<{1,1}?!--.*?-->} $src ""]
set parts [splitByPattern $src]
foreach part $parts {
if {[string match {<!\[CDATA\[*\]\]>} $part]} then {
lappend result $part
} else {
lappend result {*}[splitByPattern $part {<[^>]+>}]
}
}
lappend result
}
proc ::XmlDict::attributes tag {
set atts0 [regexp -inline -all {\S+="[^"]*"} $tag]
set atts1 [regexp -inline -all {\S+='[^']*'} $tag]
lappend result
foreach attStr $atts0 {
regexp {^(\S+)="([^"]+)"$} $attStr - tag val
lappend result $tag $val
}
foreach attStr $atts1 {
regexp {^(\S+)='([^']+)'$} $attStr - tag val
lappend result $tag $val
}
set result
}
proc ::XmlDict::tokensDump src {
lappend result
foreach part [splitByMarkup $src] {
if {![string match <* $part]} then {
lappend result [list cdata $part]
} elseif {[regexp {^<!\[CDATA\[(.*)\]\]>$} $part - cdata]} then {
lappend result [list cdata [string map {& & < < > >} $cdata]]
} elseif {[string match </* $part]} then {
lappend result close
} elseif {[regexp {<([:alnum:]]+)[^>]*?>} $part - name]} then {
lappend result [list element $name atts [attributes $part]]
if {[string match */> $part]} then {
lappend result close
}
}
}
set result
}
proc ::XmlDict::dumpToList dump {
lappend result
while {[llength $dump] > 0} {
set dump [lassign $dump first]
switch -exact -- [lindex $first 0] {
element {
set name [dict get $first element]
set atts [dict get $first atts]
lassign [dumpToList $dump] dump cont
lappend result [list element $name attribute $atts content $cont]
}
cdata {
lappend result $first
}
close {
return [list $dump $result]
}
}
}
list $result $dump
}
proc ::XmlDict::xmlToDict xmlSrc {
lassign [dumpToList [tokensDump $xmlSrc]] dump
while {[llength $dump] > 0 &&
[lindex $dump 0 0] ne "element"} {
set dump [lrange $dump 1 end]
}
lindex $dump 0
}
proc ::XmlDict::childElements tree {
if {[dict exists $tree content]} then {
dict get $tree content
}
}
proc ::XmlDict::getElementByName {tree name} {
if {[dict exists $tree element] &&
[string match $name [dict get $tree element]]} then {
set tree
} else {
foreach child [childElements $tree] {
set result [getElementByName $child $name]
if {$result ne ""} then {
return $result
}
}
}
}
proc ::XmlDict::getElementById {tree id} {
if {[dict exists $tree attribute id] &&
[string match $id [dict get $tree attribute id]]} then {
return $tree
} else {
foreach child [childElements $tree] {
set result [getElementById $child $id]
if {$result ne ""} then {
return $result
}
}
}
}
proc ::XmlDict::getAllElementsByName {tree name} {
lappend result
if {[dict exists $tree element] &&
[string match $name [dict get $tree element]]} then {
lappend result $tree
}
foreach el [childElements $tree] {
lappend result {*}[getAllElementsByName $el $name]
}
set result
}
proc ::XmlDict::decodeEntities cdata {
set entities\
[lsort -unique [regexp -inline -all {&[^;]*;} $cdata]
lappend map """ \u0022 {*}{& & ' ' < <; > >}
foreach entity $entities {
if {[regexp {&#([:digit:]]+);} $entity - dec]} then {
lappend map $entity [format %c $dec]
} elseif {[regexp {&#x([:xdigit:]]+);} $entity - hex]} then {
set dec [scan $hex %x]
lappend map $entity [format %c $dec]
}
}
string map $map $cdata
}
proc ::XmlDict::getCdataFromElement tree {
set result ""
foreach el [dict get $tree content] {
if {[dict exists $el cdata]} then {
append result [decodeEntities [dict get $el cdata]]
} else {
append result [getCdataFromElement $el]
}
}
set result
}
namespace import ::XmlDict::*