% Tree mytree ::mytreeInsert some nodes
% mytree insert root 0 domNode4 % mytree insert root 0 domNode5 % mytree insert root 0 domNode6 % mytree children root domNode6 domNode5 domNode4 % mytree insert domNode4 0 domNode7 % mytree domDoc domDoc1 % [mytree domDoc] documentElement domNode2Perform an xpath query for all the leaves in the tree
% [[mytree domDoc] documentElement] selectNodes {//*[count(child::*)=0]} domNode5 domNode6 domNode7}
# Partial wrapping of tDOM functionality with tcllib tree interface package require tdom package require XOTcl namespace import xotcl::* Class Tree Tree instproc init {{existingDomDoc ""}} { [self] instvar domDoc if {[string length $existingDomDoc] == 0} { set domDoc [dom createDocument node] } else { set domDoc $existingDomDoc } [self] parametercmd domDoc next } Tree instproc insertNode {parent idx child} { # tcllib tree indexing starts at zero, but DOM starts at one incr idx set sibling [$parent child $idx] if {[string length $sibling] == 0} { return [$parent appendChild $child] } else { return [$parent insertBefore $child $sibling] } } # tcllib tree allows an optional third argument to specify the # name of the node. tDOM doesn't allow node names to be specified # The easy way out--don't allow the third argument Tree instproc insert {parent idx} { [self] instvar domDoc # Create the node set newNode [$domDoc createElement node] # Add the node to the tree [self] insertNode $parent $idx $newNode } Tree instproc move {newParent idx nodeToMove args} { foreach node [concat $nodeToMove $args] { set oldParent [$node parentNode] $oldParent removeChild $node [self] insertNode $newParent $idx $node incr idx } } Tree instproc keyexists {node -key key} { $node hasAttribute $key } Tree instproc keys {node} { $node attributes } Tree instproc depth {node} { return [llength [$node ancestor all]] } Tree instproc size {node} { return [llength [$node descendant all]] } Tree instproc isleaf {node} { return [expr [llength [$node descendant all]] == 0] } # This seems correct, but isn't giving me what I expect Tree instproc index {node} { return [llength [$node psibling all]] } # Try this instead Tree instproc index {node} { return [lsearch [[self] children [[self] parent $node]] $node] } Tree instproc numchildren {node} { return [llength [$node child all]] } Tree instproc set {node args} { switch [llength $args] { 0 { $node getAttribute data } 1 { $node setAttribute data [lindex $args 0] $node getAttribute data } 2 { $node getAttribute [lindex $args 1] } 3 { set switch [lindex $args 0] set key [lindex $args 1] set value [lindex $args 2] $node setAttribute $key $value $node getAttribute $key } default {error "wrong number of arguments"} } } # Dynamically construct the simple command that have only a node argument foreach {treeCmd domCmd} {children childNodes parent parentNode previous previousSibling next nextSibling delete delete} { Tree instproc $treeCmd {node} "\$node $domCmd" } Tree instproc destroy {args} { [self] instvar domDoc $domDoc delete next } # The tcllib interface always names the root node "root" # tDOM doesn't have a way to specify node names. Therefore, # install this filter to automatically convert "root" to the # actual root element Tree instproc convertRoot {node args} { [self] instvar domDoc # Convert input node from name "root" if {$node == "root"} { set node [$domDoc documentElement] } # Dispatch the method set retVal [eval next $node $args] # Convert output node to name "root" if {$retVal == [$domDoc documentElement]} { return root } else { return $retVal } } Tree instfilter convertRoot # Only call the filter for methods that have node as a first argument Tree instfilterguard convertRoot { ([lsearch {init destroy} [self calledproc]] < 0) && ([lsearch [Tree info instcommands] [self calledproc]] >= 0) }
LV So, what in version 2.0 or newer of struct::tree prevents the above from working - and is there a way to improve the code so that it would work in either version?
male - 2010-03-23 - I had the problem to parse very malformed html, not parsable by tDOM, so I decided to try tcllib's htmlparse package with the current [struct::tree] package. After parsing the html source text into the struct::tree, I convert the tree into a tDOM DOM to use XPaths to extract the needed information.Here the code usable with tcl 8.6 due to the usage of try/on/finally:
package require tdom; package require htmlparse; package require struct::tree; namespace eval html2dom { proc Attributes {data} { set attributes [dict create]; foreach {=> name value} [regexp -inline -all -- {(\w+)(?:=\"?([^\"]*))?\"?} $data] { dict set attributes $name [expr {$value eq "" ? $name : $value}]; } return $attributes; } proc Walk {tree parentNode node dom parentDomNode} { set type [$tree get $node type]; if {$parentNode eq $node} { set domNode $parentDomNode; } else { if {$type eq "PCDATA"} { set domNode [$dom createTextNode $type]; $domNode nodeValue [$tree get $node data]; $parentDomNode appendChild $domNode; return; } set domNode [$dom createElement $type]; $parentDomNode appendChild $domNode; } if {[$tree keyexists $node data]} { set data [$tree get $node data]; if {$data ne ""} { set attributes [Attributes $data]; if {$attributes ne ""} { $domNode setAttribute {*}$attributes; } } } foreach childNode [$tree children $node] { [lindex [info level 0] 0] $tree $node $childNode $dom $domNode; } return; } proc convert {htmlFileName} { try { set htmlData [tDOM::xmlReadFile $htmlFileName]; set tree [struct::tree]; htmlparse::2tree $htmlData $tree; htmlparse::removeVisualFluff $tree; htmlparse::removeFormDefs $tree; set dom [dom createDocument html]; set domDoc [$dom documentElement]; set treeRoot [$tree rootname]; Walk $tree $treeRoot $treeRoot $dom $domDoc; } on error {reason options} { if {[info exists tree]} { $tree destroy; } if {[info exists dom]} { $dom delete; } return -code error -options $options $reason; } finally { $tree destroy; } return $dom } namespace export -clear {[a-z]*}; namespace ensemble create; } set dom [html2dom convert $htmlFileName]; set domDoc [$dom documentElement]; set nodes [$domDoc selectNodes $xpath];