Richard Suchenwirth 2007-02-05 - The cross-tabulation code at
crosstab works like it should, but I very soon didn't like it - the
main proc is too big and has
puts sprinkled over the place. So I rewrote it in more
functional programming terms by adding some clean functions operating on matrixes (implemented as lists of row lists), lists, resp. strings. The code hasn't become shorter from that, but better organized, and the functions could be re-used in other situations
set usage {
usage: crosstab.tcl infile ?-c x,y? ?-sep '\t'? -w 12
Reads the infile (or stdin if infile is "-") for cases
Prints on stdout a cross-tabulation for the specified columns.
-c x,y : columns to use (1...), default is 1,2
-sep x : column separator, default is ";"
-w n : output column width, default is 12
}
if {[llength $argv] == 0} {puts stderr $usage; exit 1}
proc main argv {
getopt argv -sep sep ";"
set sep [subst $sep] ;#-- e.g. for \t
getopt argv -c cols 1,2
getopt argv -w w 12
set infile [lindex $argv 0]
if {$infile eq "-"} {
set f stdin
} elseif {[file exists $infile]} {
set f [open $infile]
} else {puts stderr "no such file $infile\n$::usage"; exit 1}
foreach {x y z} [split $cols ,] break
incr x -1; incr y -1
if {$z ne ""} {incr z -1}
while {[gets $f line] >= 0} {
set fields [split $line $sep]
set key [lindex $fields $x],[lindex $fields $y],
if {$z ne ""} {append key [lindex $fields $z]}
inc N($key)
}
if {$f ne "stdout"} {close $f}
set xs [get_values N 0]
set ys [get_values N 1]
if {$z ne ""} {
set zs [get_values N 2]
} else {set zs {{}}}
set totals [lrepeat [llength $ys] [lrepeat [llength $xs] 0]]
set ylabels [map [list format %-${w}s] [concat $ys Total]]
foreach zv $zs {
array unset a
foreach i [array names N *,$zv] {
foreach {p1 p2} [split $i ,] break
set a($p1,$p2) $N($i)
}
set m [marray a $xs $ys]
set totals [mexpr $totals + $m]
set m [mlabel [msums $m] [concat $xs Total] $ylabels]
puts $zv\n[mformat $m $w 1]\n
}
if {$zs ne "{}"} {
set m [mlabel [msums $totals] [concat $xs Total] $ylabels]
puts "Grand Total:\n[mformat $m $w 1]\n"
}
}
#------------------------ additional list functions
proc lrepeat {n args} { #-- built-in from 8.5
set res {}
for {set i 0} {$i<$n} {incr i} {eval lappend res $args}
set res
}
#-- [Map] a function to a list, returning the results
proc map {script list} {
set res {}
foreach i $list {lappend res [eval [linsert $script end $i]]}
set res
}
#-- Sum of a list
proc lsum list {
set res 0
foreach i $list {set res [expr {$res+$i}]}
set res
}
#-- Apply a binary operator element-wise to two matrixes, giving a third
proc mexpr {mat1 op mat2} {
set res {}
foreach row1 $mat1 row2 $mat2 row "" {
foreach col1 $row1 col2 $row2 {
lappend row [expr {$col1} $op {$col2}]
}
lappend res $row
}
set res
}
#-- Create a matrix from an array with (x,y) keys
proc marray {_arr cols rows} {
upvar 1 $_arr arr
set res {}
foreach row $rows {
set outrow {}
foreach col $cols {lappend outrow [get arr($col,$row)]}
lappend res $outrow
}
set res
}
#-- Compute row and columns sums, and add them to a matrix (at right resp. bottom
proc msums matrix {
set ncol -1
set ncols {}
foreach i [lindex $matrix 0] {
set [incr ncol] 0
lappend ncols $ncol
}
set res {}
foreach row $matrix {
foreach cell $row ncol $ncols {
set $ncol [expr {[set $ncol]+$cell}]
}
lappend res [lappend row [lsum $row]]
}
set colsums {}
foreach i [lindex $matrix 0] ncol $ncols {
lappend colsums [set $ncol]
}
lappend res [lappend colsums [lsum $colsums]]
}
#-- turn a matrix into a formatted multiline string
proc mformat {matrix {w 12} {underline 0}} {
set res ""
foreach row $matrix line "" {
foreach cell $row {append line [format %${w}s $cell]}
lappend res $line
}
if $underline {
set length [string length [lindex $res 0]]
set res [linsert $res 1 [string repeat - $length]]
}
join $res \n
}
#-- Add column and row labels to a matrix
proc mlabel {matrix collabels {rowlabels {}}} {
#-- Add column and row labels to a matrix
set res [list [linsert $collabels 0 {}]]
foreach row $matrix label $rowlabels {
lappend res [linsert $row 0 $label]
}
set res
}
#-- Retrieve a variable's value, if present, else 0
proc get _var {
upvar 1 $_var var
if {[info exists var]} {set var} else {return 0}
}
#-- Get the values of (x,y,...) array keys by position
proc get_values {_arr pos} {
upvar 1 $_arr arr
set values {}
foreach i [array names arr] {
lappend values [lindex [split $i ,] $pos]
}
lsort -unique $values
}
#-- See [getopt] for discussion
proc getopt {_argv name {_var ""} {default ""}} {
upvar 1 $_argv argv $_var var
set pos [lsearch -regexp $argv ^$name]
if {$pos>=0} {
set to $pos
if {$_var ne ""} {
set var [lindex $argv [incr to]]
}
set argv [lreplace $argv $pos $to]
return 1
} else {
if {[llength [info level 0]] == 5} {set var $default}
return 0
}
}
#-- auto-initializing increment (standard in 8.5)
proc inc {_var {amount 1}} {
upvar 1 $_var var
if ![info exists var] {set var 0}
incr var $amount
}
main $argv