DESCRIPTIONA
snit for working with the Gene Ontology framework [
1]. Candidate for including into the coming
biotcl package.
# Author : Dr. Detlef Groth, MPIMG Berlin
# Created By : Dr. Detlef Groth
# Created : 2005-02-21
# Last Modified : <050222.0600>
#
# Description
package require snit 0.97
package require oomk
snit::type GeneOntol {
# public options
option -obofile -default "" -configuremethod SetObofile
option -mkfile ""
# private variables
variable go
variable db
# views
variable pvNames
variable pvParents
variable pvAltids
variable pvObsoletes
variable GETTREE
# `GeneOntol gon -obofile obofile ?-mkfile metakitfile?' --
# constructor for the GeneOntol type
# Arguments:
# `-obofile filename' the Gene Ontology obofile
# `?-mkfile?' the metakit databasefile, defaults to obofile.mk
# -----------------------------------------------
constructor {args} {
$self configurelist $args
if {$options(-mkfile) eq ""} {
set options(-mkfile) $options(-obofile).mk
}
$self Init
}
destructor {
if {[info exists db]} {
$db close
}
}
# public methods (are lowercase)
# `gon' isAlt --
# info if an given GO-id is an alternative id
# Arguments:
# `id' a GO-id
# Returns:
# true if id is an alternative GO-id or false if it is not
# ------------------------------------------------------------
method isAlt {id} {
[$pvAltids select -exact altid $id -count 1] as pSel
if {[$pSel size] > 0} {
return true
} else {
return false
}
}
# `gon' alt2id --
# getting an the right GO-id for an alternative one
# Arguments:
# `id' a GO id
# Returns:
# returns the right GO-id for an alternative one
# ------------------------------------------------------------
method alt2id {id} {
[$pvAltids select -exact altid $id -count 1] as pSel
if {[$pSel size] > 0} {
return [$pSel get 0 id]
} else {
return ""
}
}
# `gon' isObsolete --
# info if an GO-id is an older obsolete GO-id
# Arguments:
# `id' a GO id
# Returns:
# true if id is an obsolete GO-id, false if it its not
# ------------------------------------------------------------
method isObsolete {id} {
[$pvObsoletes select -exact id $id -count 1] as pSel
if {[$pSel size] > 0} {
return true
} else {
return false
}
}
# `gon' parents --
# getting a list of parents for a certain GO-id
# Arguments:
# `id' a GO-id
# Returns:
# a list of lists with the keys parent, relation and id foreach listitem
# ------------------------------------------------------------
method parents {id} {
set parents [list]
[$pvParents select -exact id $id] as pSel
$pSel loop c {
lappend parents [array get c]
}
return $parents
};
# `gon' children --
# getting a list of children for a certain GO-id
# Arguments:
# `id' a GO-id
# Returns:
# a list of lists with the keys parent, relation and id foreach listitem
# ------------------------------------------------------------
method children {id} {
set children [list]
if {[$self isAlt $id]} {
set id [$self alt id]
}
[$pvParents select -exact parent $id] as pSel
$pSel loop c {
lappend children [array get c]
}
return $children
}
# `gon' name --
# getting the name for a GO-id
# Arguments:
# `id' a GO-id
# Returns:
# the name of the GO-id, or an empty string if no valid GO-id
# ------------------------------------------------------------
method name {id} {
if {[$self isAlt $id]} {
set id [$self alt $id]
}
[$pvNames select -count 1 -exact id $id] as pSel
if {[$pSel size] > 0} {
return [$pSel get 0 name]
} else {
return ""
}
}
# `gon' id --
# getting the id for a GO-term
# Arguments:
# `name' a GO-term
# Returns:
# the GO-id for the given term
# ------------------------------------------------------------
method id {name} {
[$pvNames select -count 1 -exact name $name] as pSel
if {[$pSel size] > 0} {
return [$pSel get 0 id]
} else {
return ""
}
}
# `gon' like --
# performs a glob style pattern search against all GO-terms
# Arguments:
# `pattern' a glob style pattern
# Returns:
# a list of lists with the keys id,name foreach matching term
# ------------------------------------------------------------
method like {pattern} {
set res [list]
[$pvNames select -glob name $pattern] as pSel
$pSel loop c {
lappend res [array get c]
}
return $res
}
# `gon' getTree --
# method to get all GO-ids forming a full topdown tree for a certain
# GO-id
# Arguments:
# `id' a GO-id
# Returns:
# a list of GO-ids, which constructs the full tree
# ------------------------------------------------------------
method getTree {id {stack 0}} {
if {$stack == 0} { array unset GETTREE }
incr stack
set GETTREE($id) 1
if {![lsearch [list GO:0003674 GO:0005575 GO:0008150] $id] >= 0} {
foreach par [$self parents $id] {
array set c $par
$self getTree $c(parent) $stack
}
}
return [array names GETTREE]
}
method getDefinition {id} {
error "method getDefinition not yet implemented"
# use an index for the obofile
# (index done inside Init)
# jump via seek inside obofile
# extract the definition
}
method getInfo {} {
error "method getInfo not yet implemented"
# inside the mkfile during Init
# collect number of MF,BP,CC terms
# ua
}
# private methods (are uppercase)
method SetObofile {option value} {
if {$options($option) ne ""} {
error "option $option can only be set a object creation time"
}
set options($option) $value
#$self Init
}
method Init {} {
if {[info exists db]} {
$db close
}
if {[file exists "$options(-mkfile)"]} {
puts stderr filexists
set db [mkstorage %AUTO% $options(-mkfile)]
[$db view names] as pvNames
[$db view parents] as pvParents
[$db view altids] as pvAltids
[$db view obsoletes] as pvObsoletes
return
}
puts stderr indexcreation
set prog [Progress %AUTO% -file $options(-obofile)]
set db [mkstorage %AUTO% $options(-mkfile)]
$db layout names {id name}
$db layout altids {id altid}
$db layout parents {id relation parent}
$db layout obsoletes {id}
[$db view names] as pvNames
[$db view parents] as pvParents
[$db view altids] as pvAltids
[$db view obsoletes] as pvObsoletes
set x 0
set id ""
if [catch {open $options(-obofile) r} infh] {
error "Cannot open $options(-obofile) : $infh"
}
while {[gets $infh line] >= 0} {
if {[incr x] % 500 == 0} { $prog progress [tell $infh] }
if {[regexp {^id: *(GO:[0-9]{7})} $line -> id]} {
#puts stderr $id
} elseif {[regexp {^alt_id: *(GO:[0-9]{7})} $line -> alt]} {
$pvAltids append id $id altid $alt
} elseif {[regexp {^name: *(.+)} $line -> go(name,$id)]} {
$pvNames append id $id name $go(name,$id)
} elseif {[regexp {^(is_a:|relationship: part_of) *(GO:[0-9]{7})} $line -> kind goid]} {
regsub {relationship: } $kind "" kind
$pvParents append id $id relation $kind parent $goid
} elseif {[regexp {^is_obsolete: *true} $line ]} {
$pvObsoletes append id $id
}
}
$db commit
}
}
# test
proc test {} {
source [file join [file dirname [info script]] Progress.tcl]
set datadir e:/links/project/goblet/data/
foreach version {2004-05-01 2004-09-01 2005-02-01} {
set t [clock seconds]
set gon [GeneOntol %AUTO% -obofile $datadir/goblet-databases/gene_ontology.obo.$version]
puts stderr "Loading version $version in [expr [clock seconds] - $t] seconds"
puts [$gon parents GO:0003720]
puts [$gon children GO:0003674]
puts [$gon name GO:0003674]
foreach go {GO:0003674 GO:0003720} {
puts "Children of $go :"
foreach child [$gon children $go] {
array set c $child
set name [$gon name $c(id)]
puts " $c(id) $go $name [$gon id $name]"
}
puts stderr "[expr [clock seconds] - $t] seconds"
}
foreach go [$gon like membrane*] {
puts stderr " pattern $go [$gon name $go]"
}
puts stderr [$gon name GO:0003674]
puts stderr [$gon id [$gon name GO:0003674]]
puts stderr [$gon children GO:0003674]
puts stderr [$gon name GO:0048201]
puts stderr [$gon id [$gon name GO:0048201]]
puts stderr [$gon parents GO:0048201]
puts stderr "obsolet: GO:0000211 [$gon name GO:0000211]"
puts stderr [$gon parents GO:0000211]
puts stderr "obsolet-children: GO:0000211"
puts stderr [$gon children GO:0000211]
puts stderr "is_alt GO:0004752"
puts stderr [$gon name GO:0004752]
puts stderr [$gon parents GO:0004752]
puts stderr "is_alt children GO:0004752"
puts stderr [$gon children GO:0004752]
puts stderr "[expr [clock seconds] - $t] seconds"
puts stderr "A tree for GO:0048201"
puts stderr [$gon getTree GO:0048201]
}
}