# REX/Perl 1.0
# Robert D. Cameron "REX: XML Shallow Parsing with Regular Expressions",
# Technical Report TR 1998-17, School of Computing Science, Simon Fraser
# University, November, 1998.
# Copyright (c) 1998, Robert D. Cameron.
# The following code may be freely used and distributed provided that
# this copyright and citation notice remains intact and that modifications
# or additions are clearly identified.
#
# 06Apr03 Brian Theado - Direct translation from Perl to Tcl
set TextSE "\[^<]+"
set UntilHyphen "\[^-]*-"
set Until2Hyphens "${UntilHyphen}(?:\[^-]$UntilHyphen)*-"
set CommentCE "${Until2Hyphens}>?"
set UntilRSBs "\[^\\]]*](?:\[^\\]]+])*]+"
set CDATA_CE "${UntilRSBs}(?:\[^\\]>]$UntilRSBs)*>"
set S "\[ \\n\\t\\r]+"
set NameStrt "\[A-Za-z_:]|\[^\\x00-\\x7F]"
set NameChar "\[A-Za-z0-9_:.-]|\[^\\x00-\\x7F]"
set Name "(?:$NameStrt)(?:$NameChar)*"
set QuoteSE "\"\[^\"]*\"|'\[^']*'"
set DT_IdentSE "$S${Name}(?:${S}(?:${Name}|$QuoteSE))*"
set MarkupDeclCE "(?:\[^\\]\"'><]+|$QuoteSE)*>"
set S1 "\[\\n\\r\\t ]"
set UntilQMs "\[^?]*\\?+"
set PI_Tail "\\?>|$S1${UntilQMs}(?:\[^>?]$UntilQMs)*>"
set DT_ItemSE "<(?:!(?:--${Until2Hyphens}>|\[^-]$MarkupDeclCE)|\\?${Name}(?:$PI_Tail))|%$Name;|$S"
set DocTypeCE "${DT_IdentSE}(?:$S)?(?:\\\[(?:$DT_ItemSE)*](?:$S)?)?>?"
set DeclCE "--(?:$CommentCE)?|\\\[CDATA\\\[(?:$CDATA_CE)?|DOCTYPE(?:$DocTypeCE)?"
set PI_CE "${Name}(?:$PI_Tail)?"
set EndTagCE "${Name}(?:$S)?>?"
set AttValSE "\"\[^<\"]*\"|'\[^<']*'"
set ElemTagCE "${Name}(?:$S${Name}(?:$S)?=(?:$S)?(?:$AttValSE))*(?:$S)?/?>?"
set MarkupSPE "<(?:!(?:$DeclCE)?|\\?(?:$PI_CE)?|/(?:$EndTagCE)?|(?:$ElemTagCE)?)"
set XML_SPE "$TextSE|$MarkupSPE"
proc ShallowParse {xml} {
global XML_SPE
return [regexp -inline -all $XML_SPE $xml]
}if 0 {Example use: % set xml {<html>
<head>
<title>XML Shallow Parsing with Regular Expressions</title>
<meta http-equiv="Pragma" content="no-cache"></meta>
<meta http-equiv="Expire" content="Mon, 04 Dec 1999 21:29:02 GMT"></meta>
<link rel="stylesheet" href="http://wiki.tcl.tk/wikit.css
"
type="text/css"></link>
<base href="http://wiki.tcl.tk/
">
</head>}
% ShallowParse $xml
<html> {
} <head> {
} <title> {XML Shallow Parsing with Regular Expressions} </title> {
} {<meta http-equiv="Pragma" content="no-cache">} </meta> {
} {<meta http-equiv="Expire" content="Mon, 04 Dec 1999 21:29:02 GMT">} </meta> {
} {<link rel="stylesheet" href="http://wiki.tcl.tk/wikit.css
"
type="text/css">} </link> {
} {<base href="http://wiki.tcl.tk/">}
{
} </head>}if 0 {}

