# Google image search slideshow # this is a simple app that screen-scrapes Google image search results # and creates a slideshow of the results, uses tck and tk. # user setable parameters. # Image size variable, for example... # all: small%7Cmedium%7Clarge%7Cxlarge # medium and large: medium%7Clarge%7Cxlarge # xlarge: xxlarge set sizevar "small%7Cmedium%7Clarge%7Cxlarge" # numlines or deepnumlines x 20 = numpics, because 20 pics per page # numlines is the maximum number of image search result pages set numlines 8 ;# cap on search # deepnumlines is the maximum number of image search result pages from an individual site if a site has multiple images set deepnumlines 8 ;# cap on deep search # max images set imagecap 500 # load packages package require http package require jpeg package require base64 ; # in tcllib, part of ActiveTcl package require img::jpeg wm title . "google viewer" # create the entry box set searchvar "" #labelframe .entryfr -text "Type Search Value And Hit Enter" labelframe .entryfr -text "Search Value" entry .entryfr.entrybox -textvariable searchvar pack .entryfr .entryfr.entrybox -padx 5 -pady 5 bind . <KeyPress-Return> { destroy .entryfr } tkwait window .entryfr # just to kill if theyt close the window if { $searchvar == "" } exit # show the console for debug catch {console show} # bump the min size of window up wm minsize . 800 600 # image set-up image create photo imgarray(0) -data "R0lGODlhAQABAIAAAP///wAAACwAAAAAAQABAAACAkQBADs=" set imgurls(0) "no images loaded" set currimg 0 # gui frames labelframe .cfr -text "controls" labelframe .ifr -text "image" pack .cfr .ifr -side top -fill both # gui controls button .cfr.prev -text "<" -command { previmage } button .cfr.next -text ">" -command { nextimage } label .cfr.lb1 -text "Search:" label .cfr.searchtxt -textvariable searchvar label .cfr.lb2 -text " Image:" label .cfr.imgoftxt -text "0" label .cfr.lb3 -text "of" label .cfr.imgcounttxt -text "0" label .cfr.lb4 -text " url:" label .cfr.imgurltxt -text "loading" pack .cfr.prev .cfr.next .cfr.lb1 .cfr.searchtxt .cfr.lb2 .cfr.imgoftxt .cfr.lb3 .cfr.imgcounttxt .cfr.lb4 .cfr.imgurltxt -side left # gui image label .ifr.gimg -image imgarray($currimg) -bd 1 -relief sunken pack .ifr.gimg -side left # keyboard binding bind . <KeyPress-Right> nextimage bind . <KeyPress-Left> previmage bind . <KeyPress-Escape> exit bind . <KeyPress-space> exit # pretend to be a browser, an old one, so javascript is used by the website ::http::config -accept "*/*" ::http::config -useragent "Mozilla/2.0 (compatible; MSIE 1.0; Windows NT 5.0)" # functions for buttons - next image proc nextimage { } { global currimg global imgurls set currimg [expr $currimg+1] if { [catch { .ifr.gimg configure -image imgarray($currimg) } msg] } { # must be at the end of the array puts $msg set currimg [expr $currimg-1] } else { # must be ok .cfr.imgoftxt configure -text $currimg .cfr.imgurltxt configure -text $imgurls($currimg) } } # functions for buttons - prev image proc previmage { } { global currimg global imgurls if { $currimg != 0 } { set currimg [expr $currimg-1] .ifr.gimg configure -image imgarray($currimg) .cfr.imgoftxt configure -text $currimg .cfr.imgurltxt configure -text $imgurls($currimg) } } # Save data from a URL to a variable proc httptovar { url } { # strip invalid chars from the URL set url [ string map { "\"" "" " " "+" } $url ] puts "URL: $url" if { [catch { set readpage [http::data [::http::geturl $url -timeout 10000 ] ]} msg] } { puts "http error - $msg" return "error - $msg" } else { puts "http ok, url: $url" return $readpage } } # get links from page, screenscraping proc getlinksfromvar { firstpage } { set startchar 0 set linklist "" # loop through each link in the page while there are still occurrences of the string in firstpage while {[string first "<a href=/imgres?imgurl=" $firstpage $startchar] != -1} { # find the end of the above string to fing the start of the image set startchar [ expr [string first "/imgres?imgurl=" $firstpage $startchar] + 15] # get the end of the image string set endchar [ expr [string first "&imgrefurl=" $firstpage $startchar]-1 ] # add it to the list lappend linklist "[ string range $firstpage $startchar $endchar ]" incr startchar } return $linklist } # build search string to pass in and print it for debug set searchstr "http://www.google.com/images?as_q=[ format %s $searchvar ]&svnum=10&hl=en&c2coff=1&btnG=Google+Search&as_epq=&as_oq=&as_eq=&imgsz=[ format %s $sizevar ]&as_filetype=&imgc=&as_sitesearch=&safe=off" puts "searchstr: $searchstr" # seed val, just so it's existing set firstpage [ httptovar "http://www.google.com" ] # screen-scrape the image search results html set lineCount 0 while { $lineCount < $numlines} { puts "$searchstr&start=[ expr $lineCount * 20 ]" set newpage [ httptovar $searchstr&start=[ expr $lineCount * 20 ] ] if { [string first ">Next<" $newpage] == -1 } { puts "no more links" # so break out of this loop set lineCount $numlines } else { set firstpage "$firstpage $newpage" incr lineCount } } set deepfirstpage "" set startchar 0 set deeplinecount 0 # loop through each link in the deep page while there are still occurrences of the string in firstpage while {[string first " <a class=fl href=" $firstpage $startchar] != -1} { # find the end of the above string to fing the start of the image set startchar [ expr [string first " <a class=fl href=" $firstpage $startchar] + 25] # get the end of the image string set endchar [ expr [string first ">" $firstpage $startchar]-1 ] # for each google page of deep results, add the html to the deepfirstpage while { $deeplinecount < $deepnumlines} { # read the html of the google page results set deepnewpage [ httptovar http://images.google.com/[ string range $firstpage $startchar $endchar ]%22&start=[ expr $deeplinecount * 20 ] ] if { [string first ">Next<" $deepnewpage] == -1 } { puts "no more links" # so break out of this loop set deeplinecount $deepnumlines } else { # append it to deepfirstpage set deepfirstpage "$deepfirstpage $deepnewpage" incr deeplinecount } } # increment the page of google results incr startchar set deeplinecount 0 } # read all the image locations from the html text set links [ getlinksfromvar $firstpage ] set deeplinks [ getlinksfromvar $deepfirstpage ] # print standard links data puts "\nl links: [llength $links]" puts "\nl deeplinks: [llength $deeplinks]" # merge links together set links "$links $deeplinks" puts "\nboth links: [llength $links]" set valcount "0" set imgproced "0" foreach val $links { # check less than imagecap if { $imgproced < $imagecap } { puts "\nLink is (number $valcount): $val" if { [catch {image create photo imgarray($valcount) -data [ httptovar $val ]} msg] } { # error reading image puts "error: $msg" } else { # image read ok .cfr.imgcounttxt configure -text $valcount set imgurls($valcount) $val set valcount [ expr $valcount + 1 ] } # count the total read right or wrong incr imgproced } } puts "read $valcount of $imgproced read corectly" puts "Done"
[Robin] - 2009-06-20 15:48:38I tried it with ActiveTCL and XP and had to comment out "package require jpeg" which was giving me an error. Could you modify it to display an infinite stream of images, say every 2 seconds? I tried a bastardized version of that, just going back and forth among the fixed set of images, by adding
set x 3 set rincrement 1 while { $x==$x } { puts "\nl lets wait 300" after 300 if { $x==12 } { set rincrement -1 } if { $x==0 } { set rincrement 1 } incr x $rincrement if { $rincrement == 1 } { puts "\n next image" nextimage } else { puts "\n previous image" previmage } }at the end of your code, but it causes the whole thing to go unresponsive.RFox - April 9, 2013It might be interesting to recast this application using the Google Custom Search API: https://developers.google.com/custom-search/v1/using_rest Of course then we'd need an API key somewhere. That would remove the need to scrape the HTML...just pull the images out of the JSON result(s)