Richard Suchenwirth 2000-04-10 - Chinlish ("Chinese from English (alphabet)") is a converter that translates a number of Chinese words written in Pinyin transcription (exception: use y for u-umlaut) to the corresponding Unicodes. In contrast to the other members of
The Lish family, this is (and can only be) a partial solution, as some 4000 .. 6000 Chinese characters can even in context not perfectly be mapped to 400 Pinyin syllables. So if you use Chinlish, two things might happen:
- The word you wanted was not in the dictionary. The pinyin string will come back unchanged. Remedy: if you need it more than once, add it to the dictionary (and put it to the Wiki, for the rest of us ;-) For single occurrences, use the \u notation that always works right.
- The word you wanted was not the one retrieved, e.g. you wanted a different shi than the most frequent copula. Again: edit the dictionary, if you feel the need, or write \u....
Simplified (PRC) and traditional character forms (Hong Kong, Taiwan) are supported. Default, and recommended for dictionary entries, is Simplified. If you call the main proc chinlish (or the short name cn) with the -fan switch, traditional codes are substituted where appropriate, i.e. as defined in the
i18n_jian2fan array. If you add words with jian/fan characters to the dictionary, update the jian2fan map also. Maybe some database can be found that completely covers this mapping, but I was offline over the weekend ;-) -
RS 2007-09-04: many years later, a more comprehensive jian/fan converter is finally at
fan2jian and jian2fan. }
array set i18n_pin2u {
ba \u628a
Beijing \u5317\u4eac
bu \u4e0d
canguan \u53c2\u89c2
chengxu \u7a0b\u5e8f
da \u5927
daxue \u5927\u5b66
de \u7684
erqie \u800c\u4e14
feichang \u975e\u5e38
ge \u4e2a
gongzuo \u5de5\u4f5c
hao \u597d
he \u548c
hen \u5f88
huanying \u6b22\u8fce
huida \u56de\u7b54
jintian \u4eca\u5929
jisuanji \u8ba1\u7b97\u673a
kexue \u79d1\u5b66
lao \u8001
laoshi \u8001\u5e08
le \u4e86
Nanjing \u5357\u4eac
neng \u80fd
nenggou \u80fd\u591f
ni \u4f60
nimen \u4f60\u4eec
pengyou \u670b\u53cb
relie \u70ed\u70c8
ren \u4eba
Shanghai \u4e0a\u6d77
shi \u662f
suoyi \u6240\u4ee5
ta \u4ed6
tamen \u4ed6\u4eec
Tianjin \u5929\u6d25
wenti \u95ee\u9898
wo \u6211
women \u6211\u4eec
Xianggang \u9999\u6e2f
xiao \u5c0f
xiaoxue \u5c0f\u5b66
xuesheng \u5b66\u751f
yanjiu \u7814\u7a76
yi \u4e00
yinwei \u56e0\u4e3a
you \u6709
yuanlai \u5143\u6765
zai \u5728
zhe \u8fd9
zheyang \u8fd9\u6837
zhongguo \u4e2d\u56fd
zhongwen \u4e2d\u6587
zhongxue \u4e2d\u5b66
zhuanhuan \u8f6c\u6362
} ;#--------------------- above: the dictionary - extend as required
proc chinlish {args} {
if {$args==""} {set args "huanying, zhe shi zhongwen zhuanhuan chengxu"}
set res ""
set fan 0
if [regsub -- "-fan" $args "" args] {incr fan}
regsub {[.]$} $args " \u3002" args
regsub -all {([.,:;!?]+)} $args { \1} text
foreach i $text {
if [info exists ::i18n_pin2u($i)] {
lappend res $::i18n_pin2u($i)
} else {
lappend res $i
}
}
set res [join $res ""]
if $fan {set res [jian2fan $res]}
set res
}
proc cn args {eval chinlish $args}
proc cn:dic s {
set res [list]
foreach i [lsort [array names ::i18n_pin2u $s]] {
lappend res $i $::i18n_pin2u($i)
}
set res
}
set i18n_jian2fan {
\u4e2a \u500b
\u4e3a \u7232
\u4eec \u5011
\u53c2 \u53c3
\u56fd \u570b
\u5b66 \u5b78
\u5e08 \u5e2b
\u673a \u6a5f
\u6765 \u4f86
\u6b22 \u6b61
\u70ed \u71b1
\u89c2 \u89c0
\u8ba1 \u8a08
\u8f6c \u8f49
\u8fd9 \u9019
\u95ee \u554f
\u9898 \u984c
}
#--------- simplified(jian) - traditional(fan) mapping (incomplete) - see
fan2jian and jian2fan for better data
proc jian2fan s {
foreach {jian fan} $::i18n_jian2fan {regsub -all $jian $s $fan s}
set s
}
proc fan2jian s {
foreach {jian fan} $::i18n_jian2fan {regsub -all $fan $s $jian s}
set s
}