

rmax - 2004-11-10: Updated it to recognize the new HTML format of the site, and changed it to use http://pda.leo.org

package require http package require htmlparse namespace eval ::dict.leo.org { variable td variable table "" variable tdcounter 0 proc parse {tag close options body} { variable td variable table variable tdcounter switch -- $close$tag { /TR - /tr { if {[info exists td(2)] && [info exists td(3)]} { lappend table [string trim $td(2)] [string trim $td(3)] } set tdcounter 0 array unset td } td - td { incr tdcounter } default { set item [htmlparse::mapEscapes $body] if {[string length $item]} { append td($tdcounter) $item } } } } proc query {query} { variable table set url http://pda.leo.orgset query [http::formatQuery search $query] set tok [::http::geturl $url -query $query] foreach line [split [::http::data $tok] "\n"] { if {[string match "*ENGLISCH*DEUTSCH*" $line]} break } ::http::cleanup $tok set table [list] ::htmlparse::parse -cmd ::dict.leo.org::parse $line return $table } } proc max {a b} {expr {$a > $b ? $a : $b}} proc main {argv} { set table [dict.leo.org::query [join $argv]] set max 0 foreach c $table {set max [max $max [string length $c]]} set sep [string repeat - $max] set table [linsert $table 0 " English" " Deutsch" $sep $sep] foreach {c1 c2} $table { puts [format "%-*s %-*s" $max $c1 $max $c2] } puts "" } main $argv
RS: Proud owners of a firewall might have to add a line like
http::config -proxyhost proxy -proxyport 80at the very top of proc query. Helped in my case to really get out.
Category Internet | Web scraping | Using Tcl to write WWW client side applications |
---|