Here is a TCL implementation. pi31415punycode.tcl
# This is based on the following Python implementation.
# http://pydoc.org/get.cgi/usr/local/lib/python2.4/encodings/punycode.py
#
# Also done in C, Perl, PHP, and Javascript.
# http://www.ietf.org/rfc/rfc3492.txt
# http://cpansearch.perl.org/src/MIYAGAWA/IDNA-Punycode-0.02/lib/IDNA/Punycode.pm
# http://svn.php.net/viewvc/pear/packages/Net_IDNA/trunk/Net/IDNA.php?revision=300850&view=co
# https://gist.github.com/1035853
namespace eval ::puny {
set digits "abcdefghijklmnopqrstuvwxyz0123456789"
}
proc ::puny::toChars {nums} {
set chars [list]
foreach {char} $nums {
lappend chars [format "%c" $char]
}
return [join $chars ""]
}
# Encoding
proc ::puny::enumerate {sequence {start 0}} {
set retval [list]
set n $start
foreach {elem} $sequence {
lappend retval $n $elem
incr n
}
return $retval
}
# 3.1 Basic code point segregation
proc ::puny::segregate {str} {
set base [list]
set extended [list]
foreach {c} [split $str ""] {
scan $c "%c" {char}
if {$char < 128} {
lappend base $char
} else {
lappend extended $c
}
}
set extended [lsort $extended]
return [list $base $extended]
}
# Return the length of str, considering only characters below max.
proc ::puny::selectiveLen {str max} {
set res 0
foreach c [split $str ""] {
scan $c "%c" {char}
if {$char < $max} {
incr res
}
}
return $res
}
# Return a pair (index, pos), indicating the next occurrence of
# char in str. index is the position of the character considering
# only ordinals up to and including char, and pos is the position in
# the full string. index/pos is the starting position in the full
# string.
proc ::puny::selectiveFind {str char index pos} {
set l [string length $str]
while {true} {
incr pos
if {$pos == $l} {
return [list -1 -1]
}
set c [string index $str $pos]
if {$c == $char} {
incr index
return [list $index $pos]
} elseif {$c < $char} {
incr index
}
}
}
# 3.2 Insertion unsort coding
proc ::puny::insertionUnsort {str extended} {
set oldchar 128
set result [list]
set oldindex -1
foreach {c} $extended {
set index -1
set pos -1
scan $c "%c" {char}
set curlen [::puny::selectiveLen $str $char]
set delta [expr {($curlen + 1) * ($char - $oldchar)}]
while {true} {
lassign [::puny::selectiveFind $str $c $index $pos] {index} {pos}
if {$index == -1} {
break
}
set delta [expr {$delta + $index - $oldindex}]
lappend result [expr {$delta - 1}]
set oldindex $index
set delta 0
}
set oldchar $char
}
return $result
}
# Punycode parameters: tmin = 1, tmax = 26, base = 36
proc ::puny::T {j bias} {
set res [expr {36 * ($j + 1) - $bias}]
if {$res < 1} {
return 1
}
if {$res > 26} {
return 26
}
return $res
}
# 3.3 Generalized variable-length integers
proc ::puny::generateGeneralizedInteger {N bias} {
set result [list]
set j 0
while {true} {
set t [::puny::T $j $bias]
if {$N < $t} {
lappend result [string index $::puny::digits $N]
return $result
}
set pos [expr {$t + (($N - $t) % (36 - $t))}]
lappend result [string index $::puny::digits $pos]
set N [expr {int(($N - $t) / (36 - $t))}]
incr j
}
}
proc ::puny::adapt {delta first numchars} {
if {$first} {
set delta [expr {int($delta / 700)}]
} else {
set delta [expr {int($delta / 2)}]
}
incr delta [expr {int($delta / $numchars)}]
# int((($base - $tmin) * $tmax) / 2) == 455
set divisions 0
while {$delta > 455} {
# base - tmin
set delta [expr {int($delta / 35)}]
incr divisions 36
}
set bias [expr {$divisions + int(36 * $delta / ($delta + 38))}]
return $bias
}
# 3.4 Bias adaptation
# Punycode parameters: initial bias = 72, damp = 700, skew = 38
proc ::puny::generateIntegers {baselen deltas} {
set result [list]
set bias 72
foreach {points delta} [::puny::enumerate $deltas] {
set s [::puny::generateGeneralizedInteger $delta $bias]
set result [concat $result $s]
set bias [::puny::adapt \
$delta \
[expr {$points == 0}] \
[expr {$baselen + $points + 1}] \
]
}
return $result
}
proc ::puny::encode {text} {
lassign [::puny::segregate $text] {base} {extended}
set deltas [::puny::insertionUnsort $text $extended]
set extended [::puny::generateIntegers [llength $base] $deltas]
if {[llength $base]} {
return [format "%s-%s" [::puny::toChars $base] [join $extended ""]]
}
return [join $extended ""]
}
# Decoding
proc ::puny::toNums {text} {
set retval [list]
foreach {c} [split $text ""] {
lappend retval [scan $c "%c"]
}
return $retval
}
# 3.3 Generalized variable-length integers
proc ::puny::decodeGeneralizedNumber {extended extpos bias errors} {
set result 0
set w 1
set j 0
while {true} {
set c [lindex $extended $extpos]
if {[string length $c] == 0} {
if {$errors == "strict"} {
error "incomplete punicode string"
}
incr extpos
return [list $extpos None]
}
scan $c "%c" {char}
incr extpos
if {65 <= $char && $char <= 90} {
# A-Z
set digit [expr {$char - 65}]
} elseif {48 <= $char && $char <= 57} {
# 0x30-26
set digit [expr {$char - 22}]
} elseif {$errors == "strict"} {
set pos [lindex $extended $extpos]
error [format "Invalid extended code point '%s'" $pos]
} else {
return [list $extpos None]
}
set t [::puny::T $j $bias]
set result [expr {$result + $digit * $w}]
if {$digit < $t} {
return [list $extpos $result]
}
set w [expr {$w * (36 - $t)}]
incr j
}
}
# 3.2 Insertion unsort coding
proc ::puny::insertionSort {base extended errors} {
set char 128
set pos -1
set bias 72
set extpos 0
while {$extpos < [llength $extended]} {
lassign [::puny::decodeGeneralizedNumber \
$extended \
$extpos \
$bias \
$errors \
] {newpos} {delta}
if {$delta == "None"} {
# There was an error in decoding. We can't continue because
# synchronization is lost.
return $base
}
set pos [expr {$pos + $delta + 1}]
set char [expr {$char + int($pos / ([llength $base] + 1))}]
if {$char > 1114111} {
if {$errors == "strict"} {
error [format "Invalid character U+%x" $char]
}
scan "?" "%c" {char}
}
set pos [expr {$pos % ([llength $base] + 1)}]
set base [concat \
[lrange $base 0 "$pos-1"] \
$char \
[lrange $base $pos end] \
]
set bias [::puny::adapt \
$delta \
[expr {$extpos == 0}] \
[llength $base] \
]
set extpos $newpos
}
return $base
}
proc ::puny::decode {text errors} {
set pos [string last "-" $text]
if {$pos == -1} {
set base [list]
set code [string toupper $text]
set extended [split $code ""]
} else {
set base [::puny::toNums [string range $text 0 "$pos-1"]]
set code [string toupper [string range $text "$pos+1" end]]
set extended [split $code ""]
}
return [::puny::toChars [::puny::insertionSort $base $extended $errors]]
}test.tcl
package require tcltest
source punycode.tcl
proc testme {str} {
set code [::puny::encode $str]
set text [::puny::decode $code ""]
if {$str != $text} {
error "Round-trip error"
}
return $code
}
::tcltest::test emptyinput "Empty input" \
-body {testme ""} \
-result ""
::tcltest::test singleunichar "Single unicode character" \
-body {testme "ü"} \
-result "tda"
::tcltest::test nounichar "No unicode character" \
-body {testme "Goethe"} \
-result "Goethe-"
::tcltest::test midleunichar "Unicode character in the middle" \
-body {testme "Bücher"} \
-result "Bcher-kva"
::tcltest::test indashes "Text within dashes" \
-body {testme {-> $1.00 <-}} \
-result {-> $1.00 <--}DKF: Here's a working punycode encoder, converted from the code in the RFC:
namespace eval punycode {
variable digits [split "abcdefghijklmnopqrstuvwxyz0123456789" ""]
# Bootstring parameters for Punycode
variable base 36
variable tmin 1
variable tmax 26
variable skew 38
variable damp 700
variable initial_bias 72
variable initial_n 0x80
proc adapt {delta first numchars} {
variable base
variable tmin
variable tmax
variable damp
variable skew
set delta [expr {$delta / ($first ? $damp : 2)}]
incr delta [expr {$delta / $numchars}]
set k 0
while {$delta > ($base - $tmin) * $tmax / 2} {
set delta [expr {$delta / ($base-$tmin)}]
incr k $base
}
return [expr {$k + ($base-$tmin+1) * $delta / ($delta+$skew)}]
}
# Main encode function
proc encode {input {case ""}} {
variable digits
variable tmin
variable tmax
variable base
variable initial_n
variable initial_bias
set in [split $input ""]
set output {}
# Initialize the state:
set n $initial_n
set delta 0
set bias $initial_bias
# Handle the basic code points:
foreach ch $in {
if {$ch < "\u0080"} {
if {$case ne ""} {
if {$case} {
append output [string toupper $ch]
} else {
append output [string tolower $ch]
}
} else {
append output $ch
}
}
}
set h [set b [string length $output]]
# h is the number of code points that have been handled, b is the
# number of basic code points.
if {$b} {
append output "-"
}
# Main encoding loop:
while {$h < [llength $in]} {
# All non-basic code points < n have been handled already. Find
# the next larger one:
for {set m inf; set j 0} {$j < [llength $in]} {incr j} {
scan [lindex $in $j] "%c" ch
if {$ch >= $n && $ch < $m} {
set m $ch
}
}
# Increase delta enough to advance the decoder's <n,i> state to
# <m,0>, but guard against overflow:
if {$m-$n > (0xffffffff-$delta)/($h+1)} {
error "overflow in delta computation"
}
incr delta [expr {($m-$n) * ($h+1)}]
set n $m
for {set j 0} {$j < [llength $in]} {incr j} {
scan [lindex $in $j] "%c" ch
if {$ch < $n && ([incr delta] & 0xffffffff) == 0} {
error "overflow in delta computation"
}
if {$ch == $n} {
# Represent delta as a generalized variable-length
# integer:
for {set q $delta; set k $base} true {incr k $base} {
set t [expr {min(max($k-$bias,$tmin),$tmax)}]
if {$q < $t} break
append output \
[lindex $digits [expr {$t + ($q-$t)%($base-$t)}]]
set q [expr {($q-$t) / ($base-$t)}]
}
append output [lindex $digits $q]
set bias [adapt $delta [expr {$h==$b}] [expr {$h+1}]]
set delta 0
incr h
}
}
incr delta
incr n
}
return $output
}
}

