Updated 2008-03-17 13:54:04 by MJ

MJ - For a nice overview of WBXML read [1].

A WBXML parser demonstrating the use of XOTcl mixins is shown below:
 package require dict
 package require XOTcl
 namespace import ::xotcl::*

 Class Wbxml

 Wbxml instproc init { raw_wbxml } {
   my instvar raw tokens
   set raw [string toupper $raw_wbxml]
   set tokens [dict create]
 }

 Wbxml instproc parse {} {
   my instvar codepage cache raw strtbl xml
   set cache $raw
   set xml {}
   # reset codepages to 0
   dict set codepage A 0
   dict set codepage E 0
   # puts "Parsing $raw"
   my unpack_version
   my unpack_publicid
   my unpack_charset
   my unpack_strtbl
   # assume no processing instructions in body so body is element
   my unpack_element
 }

 Wbxml instproc tagid_to_s {type tagid} {
   # by default we don't know any tags
   my instvar codepage tokens
   if {[dict exists $tokens $type [dict get $codepage $type] $tagid]} {
     return [dict get $tokens $type [dict get $codepage $type] $tagid]
   } else {
     # use default tag rendering
     return "${type}TOKEN_[dict get $codepage $type]_$tagid"
   }

 }

 Wbxml instproc unpack_element {} {
   my instvar cache codepage xml

   set tag [string range $cache 0 1]
   set cache [string range $cache 2 end]

   # check for codepage switch
   if {$tag eq "00"} {
     set newcodepage [scan [string range $cache 0 1] "%xx"]
     dict set codepage E $newcodepage
     set cache [string range $cache 2 end]
     return
   }

   scan $tag "%xx" tag_dec
   set attributes [expr ($tag_dec & 0x80)!=0]
   set contents [expr ($tag_dec & 0x40)!=0]
   set tagid [expr $tag_dec & 0x3F]
   set tagid [format "%X" $tagid]
   set tagtxt [my tagid_to_s E $tagid]
   set xml "${xml}<${tagtxt}"
   if $attributes {
     while {[string range $cache 0 1] ne "01"} {
       my unpack_attribute
     }
     set cache [string range $cache 2 end]
   }
   if $contents {
     set xml "${xml}>\n"
     while {[string range $cache 0 1] ne "01"} {
       my unpack_content
     }
     if {[string range $cache 0 1] ne "01" } {
       error "expected END token for tag $tagtxt, found [string range $cache 0 1] instead\n$cache"
     } else {
       set cache [string range $cache 2 end]
       set xml "${xml}</$tagtxt>\n"
     }
   } else { set xml "${xml}/>\n" }
 }
 Wbxml instproc unpack_charset {} {
   my instvar charset cache
   set raw_charset [string range $cache 0 1]
   set cache [string range $cache 2 end]
   set charsets [IANACharsets new -childof [self]]
   set charset [$charsets to_s $raw_charset]
 }

 Wbxml instproc unpack_content {} {
   my unpack_element
 }

 Wbxml instproc unpack_attribute {} {
   my instvar cache codepage xml
   set attrStart [string range $cache 0 1]
   set cache [string range $cache 2 end]

   # check for codepage switch
   if {$attrStart eq "00"} {
     set newcodepage [scan [string range $cache 0 1] "%xx"]
     dict set codepage A $newcodepage
     set cache [string range $cache 2 end]
     return
   }

   scan $attrStart "%xx" attrStart
   set attrStart [format "%X" $attrStart]
   set attribute [my tagid_to_s A $attrStart]
   # is string following?
   set next_token [format "%X" [scan [string range $cache 0 1] "%xx"]]

   if {$next_token == 3} {
     set cache [string range $cache 2 end]
     set attribute "$attribute\"[my unpack_string]\""
   } elseif { [scan $next_token "%xx"] > 128 } {
     # well known attribute value
     set cache [string range $cache 2 end]
     set attribute "$attribute\"[my tagid_to_s A $next_token]\""
   }
   set xml "$xml $attribute"
 }

 Wbxml instproc unpack_string {} {
   my instvar cache

   set result ""
   while {[string range $cache 0 1] ne "00"} {
     set result $result[format "%c" [scan [string range $cache 0 1] "%xx"]]
     set cache [string range $cache 2 end]
   }
   set cache [string range $cache 2 end]
   return $result
 }

 Wbxml instproc unpack_strtbl {} {
   # assume empty strtbl
   my instvar strtbl cache
   set cache [string range $cache 2 end]
   set strtbl {}
 }

 Wbxml instproc unpack_version {} {
   my instvar cache version
   set raw_version [string range $cache 0 1]
   set cache [string range $cache 2 end]
   set major [string index $raw_version 0]
   incr major
   set minor [string index $raw_version 1]
   dict set version major $major
   dict set version minor $minor
 }

 Wbxml instproc unpack_publicid {} {
   my instvar cache publicid
   set raw_publicid [string range $cache 0 1]
   set cache [string range $cache 2 end]
   # only support OMA Provisioning Documents for now TODO
   dict set publicid raw $raw_publicid
   set tmp [DocumentPublicIdentifier new -childof [self]]
   dict set publicid txt [$tmp pid_to_s $raw_publicid]

   # mixin specific functionality for the current
   my mixin [$tmp encoder_class $raw_publicid]

   #add tokens
   my add_tokens
 }

 Wbxml instproc display {} {
   my instvar version publicid strtbl cache charset xml
   puts "WBXML Version: [dict get $version major].[dict get $version minor]"
   puts "Document Public Id: [dict get $publicid txt] ([dict get $publicid raw])"
   # puts "Defined tokens [my tokens]"
   puts "Document charset: $charset"
   puts "String table: $strtbl"
   puts "Unparsed octets in last parse: $cache"
   puts "Parsed result:\n$xml"
 }

 # http://developer.sonyericsson.com/getDocument.do?docId=65055
 Class WbxmlWapOta

 WbxmlWapOta instproc add_tokens {} {
   my instvar tokens
   dict set tokens E 0 5   characteristic-list
   dict set tokens E 0 6   characteristic
   dict set tokens E 0 7   parm

   dict set tokens A 0 6   {type="ADDRESS"}
   dict set tokens A 0 7   {type="URL"}
   dict set tokens A 0 8   {type="NAME"}
   dict set tokens A 0 11  {value=}
   dict set tokens A 0 12  {name="BEARER"}
   dict set tokens A 0 13  {name="PROXY"}
   dict set tokens A 0 14  {name="PORT"}
   dict set tokens A 0 15  {name="NAME"}
   dict set tokens A 0 17  {name="URL"}
   dict set tokens A 0 1C  {name="GPRS_ACCESSPOINTNAME"}
   dict set tokens A 0 21  {name="CSD_DIALSTRING"}
   dict set tokens A 0 28  {name="CSD_CALLTYPE"}
   dict set tokens A 0 29  {name="CSD_CALLSPEED"}
   dict set tokens A 0 22  {name="PPP_AUTHTYPE"}
   dict set tokens A 0 23  {name="PPP_AUTHNAME"}
   dict set tokens A 0 24  {name="PPP_AUTHSECRET"}
   dict set tokens A 0 45  {value="GSM/CSD"}
   dict set tokens A 0 49  {value="GPRS"}
   dict set tokens A 0 61  {value="9201"}
   dict set tokens A 0 6B  {value="9600"}
   dict set tokens A 0 70  {value="PAP"}
   dict set tokens A 0 73  {value="ISDN"}
   dict set tokens A 0 7C  {type="MMSURL"}
   dict set tokens A 0 7F  {value="BOOKMARK"}

 }

 WbxmlWapOta instproc display {} {
   my instvar raw
   puts "WAP OTA Provisioning Document"
   next
 }

 Class WbxmlOmaProv

 # microsoft specific:
 # http://msdn.microsoft.com/library/default.asp?url=/library/en-us/mobilesdk5/html/wce51congsmsmartphonepocketpcphoneeditionprovisioningwbxml.asp

 WbxmlOmaProv instproc add_tokens {} {
   my instvar tokens
   dict set tokens E 0 5  wap-provisioningdoc
   dict set tokens E 0 6  characteristic
   dict set tokens E 0 7  parm

   dict set tokens E 1 6  characteristic
   dict set tokens E 1 7  parm

   dict set tokens A 0 5  {name=}
   dict set tokens A 0 6  {value=}
   dict set tokens A 0 7  {name="NAME"}
   dict set tokens A 0 8  {name="NAP-ADDRESS"}
   dict set tokens A 0 9  {name="NAP-ADDRTYPE"}
   dict set tokens A 0 A  {name="CALLTYPE"}
   dict set tokens A 0 C  {name="AUTHTYPE"}
   dict set tokens A 0 D  {name="AUTHNAME"}
   dict set tokens A 0 E  {name="AUTHSECRET"}
   dict set tokens A 0 F  {name="LINGER"}
   dict set tokens A 0 10 {name="BEARER"}
   dict set tokens A 0 11 {name="NAPID"}
   dict set tokens A 0 15 {name="PROXY-ID"}
   dict set tokens A 0 18 {name="PROVURL"}
   dict set tokens A 0 1C {name="STARTPAGE"}
   dict set tokens A 0 20 {name="PXADDR"}
   dict set tokens A 0 21 {name="PXADDRTYPE"}
   dict set tokens A 0 22 {name="TO-NAPID"}
   dict set tokens A 0 24 {name="SERVICE"}
   dict set tokens A 0 2F {name="PHYSICAL-PROXY-ID"}
   dict set tokens A 0 23 {name="PORTNBR"}
   dict set tokens A 0 46 {version="1.0"}
   dict set tokens A 0 50 {type=}
   dict set tokens A 0 51 {type="PXLOGICAL"}
   dict set tokens A 0 52 {type="PXPHYSICAL"}
   dict set tokens A 0 53 {type="PORT"}
   dict set tokens A 0 55 {type="NAPDEF"}
   dict set tokens A 0 56 {type="BOOTSTRAP"}
   dict set tokens A 0 5A {type="NAPAUTHINFO"}
   dict set tokens A 0 85 "IPV4"
   dict set tokens A 0 89 "APN"
   dict set tokens A 0 9A "PAP"
   dict set tokens A 0 AA "GSM-CSD"
   dict set tokens A 0 AB "GSM-GPRS"
   dict set tokens A 0 CB "CO-WSP"

   dict set tokens A 1 6  {value=}
   dict set tokens A 1 7  {name="NAME"}

   dict set tokens A 1 16 {type="CM_GPRSEntries"} ; #MS
   dict set tokens A 1 17 {type="DevSpecificCellular"} ; #MS

   dict set tokens A 1 1C {name="STARTPAGE"}
   dict set tokens A 1 22 {name="TO-NAPID"}
   dict set tokens A 1 28 {name="DestId"} ; #MS
   dict set tokens A 1 2A {name="UserName"} ; #MS
   dict set tokens A 1 2B {name="Password"} ; #MS
   dict set tokens A 1 3A {name="URI"}
   dict set tokens A 1 34 {type="ADDR"}
   dict set tokens A 1 36 {type="APPID"}
   dict set tokens A 1 39 {type="TO-PROXY"}
   dict set tokens A 1 55 {type="APPLICATION"}
   dict set tokens A 1 59 {type="RESOURCE"}
   dict set tokens A 1 61 {name="GPRSInfoAccessPointName"} ; #MS

   dict set tokens A 2 5 {type="BrowserFavorite"} ; #MS
   dict set tokens A 2 6 {name="URL"} ; #MS

   dict set tokens A 3 26 {name="REPLYADDR"} ; #MS
 }

 WbxmlOmaProv instproc display {} {
   puts "OMA Provisioning Document"
   next
 }

 # http://www.iana.org/assignments/character-sets
 Class IANACharsets

 IANACharsets instproc init {} {
     my instvar charsets
     dict set charsets 106 "UTF-8"
 }

 IANACharsets instproc to_s {id} {
   my instvar charsets
   scan $id "%xx" dec_id
   return [dict get $charsets $dec_id]
 }

 # http://www.wapforum.org/WINA
 Class DocumentPublicIdentifier

 DocumentPublicIdentifier instproc init {} {
   my instvar pids
   dict set pids 01 {{unknown (assuming WAP OTA v70)} WbxmlWapOta}
   dict set pids 0B { "-//WAPFORUM//DTD PROV 1.0//EN (Provisioning 1.0)" WbxmlOmaProv }
 }

 DocumentPublicIdentifier instproc pid_to_s { pid } {
   my instvar pids
   return [lindex [dict get $pids $pid] 0]
 }

 DocumentPublicIdentifier instproc encoder_class { pid } {
   my instvar pids
   return [lindex [dict get $pids $pid] 1]
 }

 package provide wbxml 0.1