DKF, 2005-Feb-17Here is a
Tcl script I wrote to track resource usage. It is a wrapper around a program that observes how much time that program took to execute, and then it writes that information out somewhere relevant, together with other collected data (e.g. on the number of processors assigned to the program, who was running it, etc.) The output format is as an
XML document, according to the spec found originally at
http://www.gridforum.org/2003/ur-wg/urwg-schema.09.02.xsd though I think this document has moved subsequently. (Ah, the hazards of tracking a moving target!) In general Tcl terms, the interesting bit is probably the code to generate the XML, which is really quite neat and guarantees that everything is quoted correctly. The use of
TclX features might also be interesting.
License is BSD. Have fun with this code!
package require Tcl 8.4
package require Tclx 8.4
# Generate a "unique" number that is also an XML id; the following
# code produces a value that is highly unlikely to be duplicated by
# accident, and so is good enough :^)
set id [string map {- {}} [id host][id userid][pid][clock seconds][clock clicks]]
# ----------------------------------------------------------------------
set DATE_FMT "%a %b %e %H:%M:%S %Y" ;#Tue Mar 2 11:56:34 2004
set UR_DATE_FMT "%Y-%m-%dT%H:%M:%SZ" ;#2004-03-02T11:56:34Z
proc xmlEncode {string} {
#put quote in quotes to keep the wiki syntax highlighter happy
string map {< < > > & & "\"" " ' '} $string
}
proc element {name -> content args} {
upvar 1 chan chan inset inset
if {[set ->] ne "->"} {
error "missing ->"
}
if {![info exist inset]} {set inset ""}
set start $name
foreach {key val} $args {
append start " $key=\"" [xmlEncode $val] "\""
}
puts $chan "$inset<$start>[xmlEncode $content]</$name>"
}
proc structElement {name {opts {}} {body {}}} {
upvar 1 chan chan inset inset
if {[string length $opts] && ![string length $body]} {
set body $opts
set opts {}
}
set start $name
foreach {key val} $opts {
append start " $key=\"" [xmlEncode $val] "\""
}
if {![info exist inset]} {set inset ""}
puts $chan "$inset<$start>"
set oldInset $inset
append inset " "
uplevel 1 $body
set inset $oldInset
puts $chan "$inset</$name>"
}
# ----------------------------------------------------------------------
# The next two procedures are retained mostly for historical interest...
proc writeStartRecord {chan} {
global env id argv
set args [lassign $argv program]
structElement start_record "id $id" {
element program -> $program
element arguments -> $args
element jobdir -> [file normalize [pwd]]
element userid -> [id user]
element host -> [id host]
if {[info exist env(UNICORE_IDENTITY)]} {
element unicore_identity -> $env(UNICORE_IDENTITY)
}
}
flush $chan
}
proc writeFullRecord {chan exitCode} {
global DATE_FMT id env argv execPid startInfo endInfo caeflag
set args [lassign $argv program]
set start [clock format $startInfo(date) -format $DATE_FMT]
set end [clock format $endInfo(date) -format $DATE_FMT]
set user [expr {([lindex $endInfo(times) 2] - [lindex $startInfo(times) 2]) * 0.001}]
set sys [expr {([lindex $endInfo(times) 3] - [lindex $startInfo(times) 3]) * 0.001}]
structElement license_record "id $id" {
element program -> [auto_execok $program]
element caeflag -> $caeflag
element arguments -> $args
element jobdir -> [file normalize [pwd]]
if {[info exist env(UNICORE_IDENTITY)]} {
element unicore_identity -> $env(UNICORE_IDENTITY)
}
element userid -> [id user]
element gecos -> $env(NAME)
element host -> [id host]
element pid -> $execPid
element startdate -> $start
element enddate -> $end
element walltime -> [expr {$endInfo(date)-$startInfo(date)}]s
element usertime -> [format %.2fs $user]
element systemtime -> [format %.2fs $sys]
}
flush $chan
}
# ----------------------------------------------------------------------
proc ur:clock {time} {
global UR_DATE_FMT
clock format $time -format $UR_DATE_FMT -gmt true
}
proc writeUsageRecord {chan exitCode} {
global id env argv execPid startInfo endInfo caeflag
set jobID [lindex [split [file tail [pwd]] _] 1]
set args [lassign $argv program]
set wall [expr {$endInfo(date) - $startInfo(date)}]
set user [format %.3f [expr {
([lindex $endInfo(times) 2] - [lindex $startInfo(times) 2]) * 0.001
}]]
set sys [format %.3f [expr {
([lindex $endInfo(times) 3] - [lindex $startInfo(times) 3]) * 0.001
}]]
# Need to look up the right stuff!
structElement UsageRecord {
xmlns http://www.gridforum.org/2003/ur-wg
xmlns:urwg http://www.gridforum.org/2003/ur-wg
xmlns:xsi http://www.w3.org/2001/XMLSchema-instance
xmlns:ds http://www.w3.org/2000/09/xmldsig#
xmlns:ucrl http://www.unicore.org/ajo/resources/log
xmlns:egb http://www.eurogrid.org/billing
xsi:schemaLocation \
http://www.gridforum.org/2003/ur-wg/urwg-schema.09.02.xsd
} {
element RecordIdentity -> "" \
urwg:createDate [ur:clock [clock seconds]] \
urwg:recordId "urn:eurogrid:billing:$id"
structElement JobIdentity {
element GlobalJobId -> unicore_job://[info hostname]/$jobID
element LocalJobId -> $id
element ProcessId -> $execPid
}
structElement UserIdentity {
element LocalUserId -> [id user]
}
element WallDuration -> PT${wall}S
element CpuDuration -> PT${user}S usageType "user"
element CpuDuration -> PT${sys}S usageType "system"
element EndTime -> [ur:clock $endInfo(date)]
element StartTime -> [ur:clock $startInfo(date)]
element Status -> $exitCode urwg:description "exit code"
element Host -> [info hostname] primary "true"
if {[info exist env(UNICORE_JOB)] && $env(UNICORE_JOB)} {
element NodeCount -> $env(UC_NODES)
element Processors -> $env(UC_PROCESSORS)
}
element Resource -> [auto_execok $program] \
urwg:description "ucrl:executableName"
element Resource -> [file normalize [pwd]] \
urwg:description "ucrl:workingDirectory"
element Resource -> $args urwg:description "ucrl:arguments"
# This field is really not useful to anyone!
#element Resource -> $caeflag urwg:description "egb:caeflag"
}
flush $chan
}
# ----------------------------------------------------------------------
proc main {} {
global serverHost serverPort startInfo execPid endInfo argv caeflag
# Record that we are about to start executing...
set startInfo(date) [clock seconds]
set startInfo(times) [times]
set executable [lindex [auto_execok [lindex $argv 0]] 0]
if {![file executable $executable]} {
puts stderr "cannot find executable: [lindex $argv 0]"
exit 2
}
# Start the execution of the monitored program
set myArgv0 [lindex $argv 0]
set myArgs [lrange $argv 1 end]
set program [auto_execok $myArgv0]
if {[set execPid [fork]] == 0} {
# Run the program
execl -argv0 $myArgv0 $program $myArgs
puts stderr "failed to execute $myArgv0: $msg"
exit 2
}
# Wait for the monitored program to finish
set code [lindex [wait $execPid] 2]
# Record when we finished
set endInfo(date) [clock seconds]
set endInfo(times) [times]
# Write the usage record to stderr
writeUsageRecord stderr $code
# Pass out the error code
exit $code
}
catch { main } msg
puts stderr "usageTracker error: $msg"
puts stderr $errorInfo
exit 2