proc tokenize { fil dict verb } { while { [gets stdin line ] >= 0 } { foreach word $line { seek $dict 0 start set found 0 while ![eof $dict ] { gets $dict buff set token [lindex $buff 0] if ![ string compare $word $token ] { puts $verb "-=-=- Found word '$word' -=-=-" foreach entry [lrange $buff 1 end ] { set token [lindex $entry 0] puts $verb "Part of speech: $token" puts -nonewline $fil " $token " set token [lindex $entry 1] puts $verb "Root: $token" puts -nonewline $fil "$token " foreach token [lrange $entry 2 end ] { puts -nonewline $fil "$token " } puts -nonewline $fil "|" } puts $fil "" set found 1 break } } if !$found { puts $verb "Error -- word '$word' not found in dictionary!" return -1 } } } } set verb stderr if [expr $argc<1 || $argc>2] { puts $verb "Format: tokenize <dictionary_file> {to_file}\nNote: input is from stdin\n" exit 1 } set verb stdout if [catch {open [lindex $argv 0 ] r } dict ] { puts $verb [format "Error -- could not open dictionary '%s' for reading!\nerror %s\n" [lindex $argv 0 ] $dict] exit 2 } if $argc==1 { set fil stdout set verb stderr } else { if [ catch { open [lindex $argv 1] w} fil ] { puts $verb [format "Error -- could not open '%s' for writing!\nerror %s\n" [lindex $argv 1 ] $fil ] exit 2 } } tokenize $fil $dict $verb close $fil close $dict
The program will need a dictionary to find the translation from a language to the generic interface, and right now works well only with an easily tokenizable language as english (latin language have a lot of endings with infinite exceptions).For easy reference, I add here some a entries:
a { D a 3 s } abuse { N abuse 3 s } { V abuse 1 s } { V abuse 1 p } { V abuse 2 s } { V abuse 2 p } { V abuse 3 p } { V abuse inf } abuses { N abuse 3 p } { V abuse 3 s } accept { V accept 1 s } { V accept 1 p } { V accept 2 s } { V accept 2 p } { V accept 3 p } { V accept inf } accepts { V accept 3 s } am { X be 1 s modal } and { P and mul } { W and mul }The format of definition is:
- Type of word: D for Det, N for noun, V for verb and so on
- Root: the logical meaning in the intermediate language, this will be needed later
- a list of attributes as needed, like mode and number in verbs
set yysccsid "@(#)yaccpar 1.8 (Berkeley) 01/20/91 (Tcl 2.0 12/31/92)"; set YYBYACC 1 set N 257 set ADJ 258 set DET 259 set V 260 set PRON 261 set AUX 262 set ADV 263 set PCONJ 264 set WCONJ 265 set YYERRCODE 256 set yylhs { -1 0 7 7 8 8 1 1 2 2 2 4 4 3 3 6 5 5 } set yylen { 2 1 3 1 3 2 3 1 3 2 1 2 0 2 3 1 2 0 } set yydefred { 0 0 0 10 0 0 0 0 1 0 11 0 15 0 0 0 0 0 9 0 8 16 4 13 0 6 2 14 } set yydgoto { 4 5 6 14 7 15 16 8 9 } set yysindex { -255 -258 -258 0 0 -251 -256 -242 0 -248 0 -240 0 -245 -255 -241 -245 -255 0 -255 0 0 0 0 -239 0 0 0 } set yyrindex { -237 -237 -237 0 0 -238 1 0 0 23 0 0 0 -238 2 0 -238 -237 0 -237 0 0 0 0 0 0 0 0 } set yygindex { 0 -9 0 0 12 -6 0 5 0 } set YYTABLESIZE 266 set yytable { 1 7 5 1 2 22 3 21 25 17 24 12 13 10 11 18 19 20 13 23 12 27 17 3 26 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 12 0 7 0 7 7 7 5 } set yycheck { 258 0 0 258 259 14 261 13 17 265 16 262 263 1 2 257 264 257 263 260 257 260 260 0 19 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 257 -1 260 -1 262 263 264 264 } set YYFINAL 4 #ifndef YYDEBUG #define YYDEBUG 0 #endif set YYMAXTOKEN 265 #if YYDEBUG set yyname { end-of-file TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull TclNull N ADJ DET V PRON AUX ADV PCONJ WCONJ } set yyrule { {$accept : p1} { p} { s PCONJ p} { s} { np vp np} { np vp} { ns WCONJ np} { ns} { DET adjp N} { adjp N} { PRON} { ADJ adjp} {} { post_modal_advs V} { modal_aux post_modal_advs V} { AUX} { ADV post_modal_advs} {} {} } #endif proc yyclearin {} { global yychar ; set yychar -1 } proc yyerrok {} { global yyerrflag ; set yyerrflag 0 } set YYSTACKSIZE 500 set YYMAXDEPTH 500 set yyss($YYSTACKSIZE) 0 set yyvs($YYSTACKSIZE) 0 set yyptvs($YYSTACKSIZE) 0 proc yyerror {errormessage} { global yynerrs incr yynerrs puts stderr $errormessage yy_err_recover } # end of yyerror proc yy_err_recover {} { global yydefred yysindex yycheck yyrindex yyerrflag yysindex yyss yyssp global YYERRCODE yychar yytable yyvsp yyn yystate yydebug yylval yyvs yyptvs if {$yyerrflag < 3} { set yyerrflag 3 while 1 { if {[set yyn [lindex $yysindex $yyss($yyssp)]] && [incr yyn $YYERRCODE]>=0 && [lindex $yycheck $yyn]==$YYERRCODE } then { if ($yydebug) { puts stderr "yydebug: state $yyss($yyssp), error recovery shifting" puts stderr " to state $yytable($yyn)\n" } set yyss([incr yyssp]) [set yystate [lindex $yytable $yyn]] set yyvs([incr yyvsp]) \"$yylval\" set yyptvs($yyvsp) [list [concat LEAF \"$yylval\"]] # perl: next yyloop return 0 } else { if ($yydebug) { puts stderr "yydebug: error recovery discarding state $yyss($yyssp), " } if {$yyssp <= 0} {return 1} incr yyssp -1 incr yyvsp -1 } } } else { if {$yychar == 0} {return 1} if ($yydebug) { set yys TclNull if {$yychar <= $YYMAXTOKEN} {set yys [lindex $yyname $yychar]} if {$yys == "TclNull"} {set $yys illegal-symbol} puts stderr "yydebug: state $yystate, error recovery discards token $yychar($yys)" } set yychar -1 # perl: next yyloop return 0 } return 0 } # end of yy_err_recover proc yyparse {} { global yydefred yysindex yycheck yyrindex yyerrflag yysindex yyssp yyss yynerrs global yychar yytable yylval yyvsp yystate yylen yylhs yygindex yydgoto YYFINAL yyn yyval global yyptval yyrule yydebug yyname yyvs yyptvs global yyParseTree yyParseTreeList yyStateTable yyParseLoopCount set yynerrs 0 set yyerrflag 0 set yychar -1 set yyssp 0 set yyvsp 0 set yyss($yyssp) [set yystate 0] while 1 { set yyn [lindex $yydefred $yystate] if {!$yyn} { if {$yychar < 0} { set yychar [yylex] if {$yychar<0} {set yychar 0} lappend yyStateTable [list $yystate read [lindex $yyname $yychar] $yylval] if $yydebug { set yys TclNull if {$yychar <= [llength $yyname]} {set yys [lindex $yyname $yychar]} if {$yys == "TclNull"} {set yys illegal-symbol} puts stderr "yydebug: state $yystate, reading $yychar ($yys : \"$yylval\")" } } if { [set yyn [lindex $yysindex $yystate]] && [expr [incr yyn $yychar]>=0] && [lindex $yycheck $yyn]==$yychar } { lappend yyStateTable [list $yystate shift [lindex $yytable $yyn]] if ($yydebug) { puts stderr "yydebug: state $yystate, shifting to state [lindex $yytable $yyn]" } set yyss([incr yyssp]) [set yystate [lindex $yytable $yyn]] set yyvs([incr yyvsp]) \"$yylval\" set yyptvs($yyvsp) [list [concat LEAF \"$yylval\"]] set yychar -1 if {$yyerrflag > 0} [incr yyerrflag -1] continue } if {!([set yyn [lindex $yyrindex $yystate]] && [expr [incr yyn $yychar]>=0] && [lindex $yycheck $yyn]==$yychar) } then { if {!$yyerrflag} { yyerror "syntax-error" incr yynerrs } if [yy_err_recover] then {return 1} } else { set yyn [lindex $yytable $yyn] } } # put debug statement inside branch lappend yyStateTable [list $yystate reduce [lindex $yylen $yyn]] if ($yydebug) { puts stderr "yydebug: state $yystate, reducing by rule $yyn" } set yym [lindex $yylen $yyn] set yyval [lindex $yyvsp [expr 1-$yym]] # this can go? set yyval "" set yyptval "" set rhslen [lindex $yylen $yyn] for {set mi 0} {$mi < $rhslen } {incr mi} { lappend yyptval [concat [lindex [lindex $yyrule $yyn] $mi] $yyptvs([expr $yyvsp-$rhslen+$mi+1])] } set yyParseTreeList([incr yyParseLoopCount]) $yyptval case $yyn in { 1 { # line 27 "engl.y" global yyretvalue set yyretvalue $yyvs([expr $yyvsp-0]) } 2 { # line 34 "engl.y" set yyval [list MUL $yyvs([expr $yyvsp-2]) $yyvs([expr $yyvsp-1]) $yyvs([expr $yyvsp-0]) ] } 3 { # line 38 "engl.y" set yyval $yyvs([expr $yyvsp-0]) } 4 { # line 44 "engl.y" if [string compare [get_num $yyvs([expr $yyvsp-2])] [get_num $yyvs([expr $yyvsp-1])] ] { yyerror "Subject number does not match verb number in sentence." return 1 } if [expr [get_pers $yyvs([expr $yyvsp-2])] != [get_pers $yyvs([expr $yyvsp-1])] ] { yyerror "Subject person does not match verb person in sentence." return 1 } if [ expr [is_pronoun $yyvs([expr $yyvsp-2])] && [check_mark $yyvs([expr $yyvsp-2]) "pronobj"] ] { yyerror "Subject pronoun is objective case." return 1 } if [ expr [is_pronoun $yyvs([expr $yyvsp-0])] && ![check_mark $yyvs([expr $yyvsp-0]) "pronobj"] ] { yyerror "Object pronoun is not objective case." return 1 } set yyval [list 0 $yyvs([expr $yyvsp-1]) $yyvs([expr $yyvsp-2]) $yyvs([expr $yyvsp-0])] } 5 { # line 64 "engl.y" if [string compare [get_num $yyvs([expr $yyvsp-1])] [get_num $yyvs([expr $yyvsp-0])] ] { yyerror "Subject number does not match verb number in sentence." return 1 } if [expr [get_pers $yyvs([expr $yyvsp-1])] != [get_pers $yyvs([expr $yyvsp-0])] ] { yyerror "Subject person does not match verb person in sentence." return 1 } if [ expr [is_pronoun $yyvs([expr $yyvsp-1])] && [check_mark $yyvs([expr $yyvsp-1]) "pronobj"] ] { yyerror "Subject pronoun is objective case." return 1 } set yyval [list 0 $yyvs([expr $yyvsp-0]) $yyvs([expr $yyvsp-1]) 0] } 6 { # line 82 "engl.y" set num p set pers [min [get_pers $yyvs([expr $yyvsp-2]) ] [get_pers $yyvs([expr $yyvsp-0])]] set marks [concat [list $pers $num] [lrange $yyvs([expr $yyvsp-2]) 4 end] [lrange $yyvs([expr $yyvsp-0]) 4 end]] set yyval [concat [list MUL [list $yyvs([expr $yyvsp-2]) $yyvs([expr $yyvsp-0]) $yyvs([expr $yyvsp-1]) ]] $marks] } 7 { # line 89 "engl.y" set yyval $yyvs([expr $yyvsp-0]) } 8 { # line 95 "engl.y" if [string compare [get_num $yyvs([expr $yyvsp-2])] [get_num $yyvs([expr $yyvsp-0])] ] { yyerror "Determiner and noun number do not match in noun phrase" return 1 } if [expr [get_pers $yyvs([expr $yyvsp-2])] != [get_pers $yyvs([expr $yyvsp-0])] ] { yyerror "Determiner and noun person do not match in noun phrase" return 1 } set marks [lrange [split [string trim $yyvs([expr $yyvsp-0]) "\""]] 2 end] set yyval [concat [list 0 [list $yyvs([expr $yyvsp-2]) $yyvs([expr $yyvsp-1]) $yyvs([expr $yyvsp-0])]] $marks ] } 9 { # line 109 "engl.y" set marks [lrange [split [string trim $yyvs([expr $yyvsp-0]) "\""]] 2 end] set yyval [concat [list 0 [list 0 $yyvs([expr $yyvsp-1]) $yyvs([expr $yyvsp-0])]] $marks ] } 10 { # line 114 "engl.y" set marks [lrange [split [string trim $yyvs([expr $yyvsp-0]) "\""]] 2 end] set yyval [concat [list PRON $yyvs([expr $yyvsp-0])] $marks ] } 11 { # line 121 "engl.y" set yyval [concat $yyvs([expr $yyvsp-0]) $yyvs([expr $yyvsp-1])] } 12 { # line 125 "engl.y" } 13 { # line 130 "engl.y" set marks [lrange [split [string trim $yyvs([expr $yyvsp-0]) "\""]] 2 end] set yyval [concat [list 0 [list 0 $yyvs([expr $yyvsp-1]) $yyvs([expr $yyvsp-0])]] $marks ] } 14 { # line 135 "engl.y" if ![check_mark $yyvs([expr $yyvsp-0]) "inf" ] { yyerror "Verb used with aux is not infinitive." return 1 } set marks [lrange [split [string trim $yyvs([expr $yyvsp-2]) "\""]] 2 end] set yyval [concat [list 0 [list $yyvs([expr $yyvsp-2]) $yyvs([expr $yyvsp-1]) $yyvs([expr $yyvsp-0])]] $marks ] } 15 { # line 146 "engl.y" if ![check_mark $yyvs([expr $yyvsp-0]) "modal" ] { yyerror "Auxilliary is not modal!" return 1 } set yyval $yyvs([expr $yyvsp-0]) } 16 { # line 156 "engl.y" set yyval $yyvs([expr $yyvsp-0]) set yyval [lappend $yyval $yyvs([expr $yyvsp-1])] } 17 { # line 161 "engl.y" } # line 411 "y.tab.tcl" } # end of actions case statement incr yyssp -$yym set yystate $yyss($yyssp) incr yyvsp -$yym set yym [lindex $yylhs $yyn] if {$yystate == 0 && $yym == 0} then { lappend yyStateTable [list $yystate rshift $YYFINAL] if {$yydebug} { puts stderr "yydebug: after reduction, shifting from state 0 to state $YYFINAL" } set yystate $YYFINAL set yyss([incr yyssp]) $YYFINAL set yyvs([incr yyvsp]) $yyval set yyptvs($yyvsp) $yyptval if {$yychar < 0} { set yychar [yylex] if {$yychar<0} { set yychar 0 } lappend yyStateTable [list $yystate readb [lindex $yyname $yychar] $yylval] if ($yydebug) { set yys TclNull if {$yychar <= [llength $yyname]} { set yys [lindex $yyname $yychar] } if {$yys == "TclNull"} { set yys illegal-symbol } puts stderr "yydebug: state $YYFINAL, reading $yychar ($yys)" } } if {$yychar == 0} {return 0} # yyloop continue } if {[set yyn [lindex $yygindex $yym]] && [expr [incr yyn $yystate]]>=0 && [expr $yyn<=[llength $yycheck]] && [expr [lindex $yycheck $yyn]==$yystate]} then { set yystate [lindex $yytable $yyn] } else { set yystate [lindex $yydgoto $yym] } lappend yyStateTable [list $yyss($yyssp) rshift $yystate] if {$yydebug} { puts stderr "yydebug: after reduction, shifting from state $yyss($yyssp) to state $yystate" } set yyss([incr yyssp]) $yystate set yyvs([incr yyvsp]) $yyval set yyptvs($yyvsp) $yyptval } } #end yyparse # line 165 "engl.y" proc check_mark { a b } { set a [string trim $a " \""] return [expr [lsearch -exact [lrange $a 2 end] $b] >= 0] } proc get_num { a } { set a [string trim $a " \""] return [lindex $a 3] } proc get_pers { a } { set a [string trim $a " \""] return [lindex $a 2] } proc is_pronoun { a } { set a [lindex [string trim $a " \""] 0 ] if [string compare $a PRON] { return 0 } else { return 1 } } proc min { a b } { return [expr $a<$b?$a:$b] } # line 487 "y.tab.tcl" proc is_mul { a } { return ![string compare MUL [lindex $a 0]] } proc find_etok { file root type } { seek $file 0 while { [gets $file line ] >= 0 } { set line [split $line] if ![string compare $root [lindex $line 0]] { if ![string compare $type [lindex $line 1]] { return [lindex $line 2] } } } return "" } proc conj { conj } { set conj [string trim $conj " \""] switch [lindex $conj 1] { and { return "'ej" } but { return "'ach" } or { return "qoj" } } return -code 1 "" } proc con { conj } { set conj [string trim $conj " \""] switch [lindex $conj 1] { and { return "je'" } or { return "joq" } } return -code 1 "" } proc n_1 { noun } { if [check_mark $noun "large"] { return "'a'" } if [check_mark $noun "small"] { return "Hom" } if [check_mark $noun "dear"] { return "oy" } return "" } proc n_3 { adjs } { foreach adj $adjs { if [check_mark $adj "so-called"] {return "qoq"} if [check_mark $adj "apparent"] {return "Hey"} if [check_mark $adj "definite"] {return "na'"} } return "" } proc n_4 { adjs } { foreach adj $adjs { if [check_mark $adj "my" ] { return "wIj" } if [check_mark $adj "your" ] { return "lIj" } if [check_mark $adj "his" ] { return "Daj" } if [check_mark $adj "her" ] { return "Daj" } if [check_mark $adj "its" ] { return "Daj" } if [check_mark $adj "our" ] { return "maj" } if [check_mark $adj "yourpl" ] { return "raj" } if [check_mark $adj "their" ] { return "chaj" } if [check_mark $adj "this" ] { return "vam" } if [check_mark $adj "that" ] { return "vetlh" } } return "" } proc plur { noun } { if [check_mark $noun "lang"] { return "pu'" } if [check_mark $noun "bodypart"] { return "Du'" } return "mey" } proc adjs { adjlist etok } { set retv "" foreach adj $adjlist { set adj [lindex $adj 1] if [llength [set k_word [find_etok $etok $adj "A" ]]] { append retv " $k_word" } } return $retv } proc tran_noun { noun file expl } { global prons if [is_mul $noun] { set first [lindex [lindex $noun 1] 0] set second [lindex [lindex $noun 1] 1] set conj [lindex [lindex $noun 1] 2] if [catch {tran_noun $first $file 1} k_1 ] { return -code 1 "" } if [catch {tran_noun $second $file 1} k_2 ] { return -code 1 "" } if [catch {con $conj } c ] { return -code 1 "" } return "$k_1 $k_2 $c " } if ![is_pronoun $noun] { set wd [lindex [lindex $noun 1] end] set wd [lindex [split $wd] 1] if ![llength [set k_word [find_etok $file $wd "N" ]]] { puts stderr "\nError -- Noun '$wd' not in English to Klingon dictionary" return -code 1 "" } append k_word [n_1 $noun] if ![string compare [get_num $noun ] p ] { append k_word [plur $noun] } set adjlist [lindex [lindex $noun 1] 1] append k_word [n_3 $adjlist] append k_word [n_4 $adjlist] append k_word [adjs $adjlist $file] return "$k_word " } else { if $expl { set sn [expr [string compare [get_num $noun ] p ]?0:4] set pers [get_pers $noun] if [expr ($pers==3) && [check_mark $noun "lang"] ] { incr pers } return [lindex $prons [expr $sn+$pers-1]] } return "" } } proc v_5 { verb } { set modal [lindex $verb 0] if [string compare $modal "0" ] { if [check_mark $modal "can"] { return "laH" } } return "" } proc v_not { verb } { foreach adv [lindex $verb 1] { if [check_mark $adv "not"] { return "be'" } } } proc tran_verb { verb subj obj etok } { global vpref set wd [lindex [lindex $verb 1] end] set wd [lindex [split $wd] 1] if ![llength [set k_word [find_etok $etok $wd "V"]]] { puts stderr "\nError -- Verb '$wd' not in English to Klingon dictionary" close $etok return -code 1 "" } if ![string compare $obj "0"] { set sn [expr [string compare [get_num $subj] p]?3:0] set pref [lindex $vpref [expr $sn+[get_pers $subj]-1]] set pref [lindex $pref 0] } else { set sn [expr [string compare [get_num $subj] p]?3:0] set pref [lindex $vpref [expr $sn+[get_pers $subj]-1]] set on [expr [string compare [get_num $obj] p]?3:0] set pref [lindex $pref [expr $sn+[get_pers $obj]-1]] } append k_word [v_5 [lindex $verb 1]] append k_word [v_not [lindex $verb 1]] return $pref$k_word } proc to_klingon { sent } { global argv if [is_mul $sent] { set first [lindex $sent 1] set second [lindex $sent 3] set conj [lindex $sent 2] set result [to_klingon $first] append result [conj $conj] " " [to_klingon $second] return $result } set file [lindex $argv 0] if [catch { open $file r } etok ] { puts stderr "Couldn't open English to Klingon dictionary '$file'" return -code 2 "" } puts stderr "Translating a valid sentence..." set obj [lindex $sent 3] set subj [lindex $sent 2] set verb [lindex $sent 1] if [string compare $obj "0" ] { if [catch {tran_noun $obj $etok 0} k_word] { close $etok return -code 1 "" } } set phrase $k_word set k_word [tran_verb $verb $subj $obj $etok] append phrase "$k_word " if [string compare $subj "0" ] { if [catch {tran_noun $subj $etok 0} k_word] { close $etok return -code 1 "" } } append phrase $k_word return $phrase } proc yylex {} { global lexindex trylist yylval yyname yydebug global N ADJ DET V PRON AUX ADV PCONJ WCONJ set yylval [string trim [lindex $trylist $lexindex] " \""] set j [lindex [lindex $trylist $lexindex] 0] switch $j { N {set rval $N} A {set rval $ADJ} D {set rval $DET} V {set rval $V} R {set rval $PRON} X {set rval $AUX} B {set rval $ADV} P {set rval $PCONJ} W {set rval $WCONJ} default {set rval ""} } incr lexindex if $yydebug { puts stderr "LEX:\t lexeme value is $yylval, lexeme type code is $rval" } return $rval } proc print_word { r } { set i [lindex $r 2] set j [lindex $r 3] switch [lindex $r 0] { N { puts -nonewline stderr "NOUN{$i$j} "} R { puts -nonewline stderr "PRON{$i$j} "} X { puts -nonewline stderr "AUX{$i$j} "} V { puts -nonewline stderr "VERB{$i$j} "} B { puts -nonewline stderr "ADV "} P { puts -nonewline stderr "PCONJ "} W { puts -nonewline stderr "WCONJ "} D { puts -nonewline stderr "DET{$i$j} "} A { puts -nonewline stderr "ADJ "} default { puts stderr "? " } } } proc load_words {fil} { global replist word_ptr while { [gets $fil buff] >= 0 } { set repword {} set j 0 set entry [split $buff |] lappend replist [lreplace $entry end end] } set i 0 foreach word $replist { set j [llength $word] puts stderr [format "Done with word $i ($j alternative%s)" \ [expr ($j==1) ?"":"s"]] incr i } puts stderr [format "%s words loaded" [llength $replist]] for {set j 0} {$j<$i} {incr j} {set word_ptr($j) 0} } # Main if [string match -* [lindex $argv 0]] { if [string match -debug [lindex $argv 0]] { set deberr -1 set argv [lrange $argv 1 end] incr argc -1 } else { puts stderr "Format: parse ?-debug? <etok_dict_file> <file>" puts stderr " or: parse ?-debug? <etok_dict_file> < <file>" exit 1 } } else { set deberr 0 } if [expr $argc>2 || $argc<1] { puts stderr "Format: parse ?-debug? <etok_dict_file> <file>" puts stderr " or: parse ?-debug? <etok_dict_file> < <file>" exit 1 } set etok_dict [lindex $argv 0] if {$argc==2} { if [catch {open [lindex $argv 1] r} fil] { puts stderr [format "Error -- couldn't open file '%s' errno $fil" [lindex $argv 1]] exit 2 } load_words $fil close $fil } else { load_words stdin } set total [set Qapla 0] set l [llength $replist] set k 0 set yydebug 0 set yyParseLoopCount 0 set YYERROR 1 set prons { jIH SoH 'oH maH tlhIH bIH chaH } set vpref {{ "jI" "" "qa" "vI" "" "Sa" "vI" }\ { "bI" "cho" "" "Da" "ju" "" "Da" }\ { "" "mu" "Du" "" "nu" "lI" "" }\ { "ma" "" "pI" "wI" "" "re" "DI" }\ { "Su" "tu" "" "bo" "che" "" "bo" }\ { "" "mu" "nI" "lu" "nu" "lI" "" }} while {$k!=$l} { set posn 0 set first 0 if $deberr { puts stderr "-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-==-=-=-=-=-=-==-=-=-=-" puts -nonewline stderr "TRYING SEQUENCE " } set lexindex 0 set trylist "" set i 0 foreach word $replist { set w [lindex $word $word_ptr($i)] lappend trylist $w if $deberr { print_word [lindex $word $word_ptr($i)] } incr i } if $deberr { puts stderr "" } if [yyparse] { if $deberr { puts stderr "THIS SEQUENCE DID NOT PARSE." } } else { puts stderr "THIS SEQUENCE PARSED SUCCESSFULLY!" incr Qapla puts stdout [to_klingon $yyretvalue] } incr total set k 0 foreach wl $replist { incr word_ptr($k) set j [llength $wl] if [llength $wl]==$word_ptr($k) { set word_ptr($k) 0 } else { break } incr k } } puts stderr "Total sequences: $total Total successes: $Qapla" if !$Qapla { exit 9 } return 0
As You can see, this attempt was to translate from english to Klingon, the warrior language. This is an example of the dictionary:
abuse N ghong abuse V ghong accept V laj and P 'ejSome code to conjugate verbs and the like has been added.A simple example can be:the yeoman hits the boywill produce this tokenization:
D the 3 s | D the 3 p | N yeoman 3 s lang | V hit 3 s | D the 3 s | D the 3 p | N boy 3 s lang |The parser will then provide following translation:
Done with word 0 (2 alternatives) Done with word 1 (1 alternative) Done with word 2 (1 alternative) Done with word 3 (2 alternatives) Done with word 4 (1 alternative) 5 words loaded -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-==-=-=-=-=-=-==-=-=-=- TRYING SEQUENCE DET{3s} NOUN{3s} VERB{3s} DET{3s} NOUN{3s} THIS SEQUENCE PARSED SUCCESSFULLY! Translating a valid sentence... loDHom lIqIp ne' -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-==-=-=-=-=-=-==-=-=-=- TRYING SEQUENCE DET{3p} NOUN{3s} VERB{3s} DET{3s} NOUN{3s} Determiner and noun number do not match in noun phrase THIS SEQUENCE DID NOT PARSE. -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-==-=-=-=-=-=-==-=-=-=- TRYING SEQUENCE DET{3s} NOUN{3s} VERB{3s} DET{3p} NOUN{3s} Determiner and noun number do not match in noun phrase THIS SEQUENCE DID NOT PARSE. -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-==-=-=-=-=-=-==-=-=-=- TRYING SEQUENCE DET{3p} NOUN{3s} VERB{3s} DET{3p} NOUN{3s} Determiner and noun number do not match in noun phrase THIS SEQUENCE DID NOT PARSE. Total sequences: 4 Total successes: 1It is a very long time, since I wrote this code. Now I think it would be worth to evolve to a more defined structure, with a cleaner structure. Any person willing to help is welcome.
Universal Translator is also the name of a part of FME Suite, an ETL tool for spatial data from [1], which uses Tcl as its embedded scripting language. Harm Olthof
[unperson] For the record, I [often claim to be] the first linguist to have been capable of programming a translator that does perfect results from French to English and presumably from any language to any language [though I've never proven this claim to anyone].I am just a good English teacher I guess since I taught Oscar the computer to speak English.Your approach -- translating in an intermediate language -- has been tried before. It does not yield any good results and worse it doubles the amount of work to be done.