18 static char vcid[] =
"$Id: xmlparser.c,v 1.3 2004/05/04 00:00:17 awb Exp $";
31 #include "lt-memory.h" 35 #define Realloc srealloc 52 static int transcribe(Parser p,
int back,
int count);
53 static void pop_while_at_eoe(Parser p);
54 static void maybe_uppercase(Parser p, Char *s);
55 static void maybe_uppercase_name(Parser p);
56 static int str_maybecase_cmp8(Parser p,
const char8 *a,
const char8 *b);
57 static int is_ascii_alpha(
int c);
58 static int is_ascii_digit(
int c);
59 static int parse_external_id(Parser p,
int required,
62 static int parse_conditional(Parser p);
63 static int parse_notation_decl(Parser p);
64 static int parse_entity_decl(Parser p, Entity ent,
int line,
int chpos);
65 static int parse_attlist_decl(Parser p);
66 static int parse_element_decl(Parser p);
67 static ContentParticle parse_cp(Parser p);
68 static ContentParticle parse_choice_or_seq(Parser p);
69 static ContentParticle parse_choice_or_seq_1(Parser p,
int nchildren,
char sep);
70 static int check_content_decl(Parser p, ContentParticle cp);
71 static int check_content_decl_1(Parser p, ContentParticle cp);
72 static Char *stringify_cp(ContentParticle cp);
73 static void print_cp(ContentParticle cp, FILE16 *
f);
74 static int size_cp(ContentParticle cp);
76 static int parse_reference(Parser p,
int pe,
int expand,
int allow_external);
77 static int parse_character_reference(Parser p,
int expand);
78 static const char8 *escape(
int c);
79 static int parse_name(Parser p,
const char8 *where);
80 static int parse_nmtoken(Parser p,
const char8 *where);
81 static int looking_at(Parser p,
const char8 *
string);
82 static void clear_xbit(XBit
xbit);
83 static int expect(Parser p,
int expected,
const char8 *where);
84 static int expect_dtd_whitespace(Parser p,
const char8 *where);
85 static void skip_whitespace(InputSource s);
86 static int skip_dtd_whitespace(Parser p,
int allow_pe);
87 static int parse_cdata(Parser p);
88 static int process_nsl_decl(Parser p);
89 static int process_xml_decl(Parser p);
90 static int parse_dtd(Parser p);
91 static int read_markupdecls(Parser p);
92 static int error(Parser p,
const char8 *format, ...);
93 static void warn(Parser p,
const char8 *format, ...);
94 static void verror(XBit bit,
const char8 *format, va_list args);
97 static int parse_pi(Parser p);
98 static int parse_comment(Parser p,
int skip);
99 static int parse_pcdata(Parser p);
100 static int parse_starttag(Parser p);
101 static int parse_attribute(Parser p);
102 static int parse_endtag(Parser p);
103 static int parse_markup(Parser p);
104 static int parse(Parser p);
105 static int parse_markupdecl(Parser p);
107 #define require(x) if(x >= 0) {} else return -1 108 #define require0(x) if(x >= 0) {} else return 0 110 #define Consume(buf) (buf = 0, buf##size = 0) 111 #define ExpandBuf(buf, sz) \ 112 if(buf##size >= (sz)+1) {} else if((buf = Realloc(buf, (buf##size = sz + 1) * sizeof(Char)))) {} else return error(p, "System error") 114 #define CopyName(n) if((n = Malloc((p->namelen + 1)*sizeof(Char)))) {memcpy(n, p->name, p->namelen * sizeof(Char)); n[p->namelen] = 0;} else return error(p, "System error"); 116 #define CopyName0(n) if((n = Malloc((p->namelen + 1)*sizeof(Char)))) {memcpy(n, p->name, p->namelen * sizeof(Char)); n[p->namelen] = 0;} else {error(p, "System error"); return 0;} 134 static Entity xml_builtin_entity;
135 static Entity xml_predefined_entities;
139 static int initialised = 0;
142 static const Char lt[] = {
'l',
't',0}, ltval[] = {
'&',
'#',
'6',
'0',
';',0};
143 static const Char gt[] = {
'g',
't',0}, gtval[] = {
'&',
'#',
'6',
'2',
';',0};
144 static const Char amp[] = {
'a',
'm',
'p',0},
145 ampval[] = {
'&',
'#',
'3',
'8',
';',0};
146 static const Char apos[] = {
'a',
'p',
'o',
's',0}, aposval[] = {
'\'',0};
147 static const Char quot[] = {
'q',
'u',
'o',
't',0}, quotval[] = {
'"',0};
148 static const Char *builtins[5][2] = {
149 {lt, ltval}, {gt, gtval}, {amp, ampval},
150 {apos, aposval}, {quot, quotval}
162 for(i=0, f=0; i<5; i++, f=e)
165 xml_builtin_entity, 0, 0, 0);
171 xml_predefined_entities = e;
176 static void skip_whitespace(InputSource s)
191 static int skip_dtd_whitespace(Parser p,
int allow_pe)
195 InputSource s = p->source;
208 "PE end not allowed here in internal subset");
210 p->external_pe_depth--;
229 c =
get(s);
unget(s);
236 "PE ref not allowed here in internal subset");
238 require(parse_reference(p, 1, 1, 1));
241 p->external_pe_depth++;
258 static int expect(Parser p,
int expected,
const char8 *where)
261 InputSource s = p->source;
267 return error(p,
"Expected %s %s, but got %s",
268 escape(expected), where, escape(c));
278 static int expect_dtd_whitespace(Parser p,
const char8 *where)
280 int r = skip_dtd_whitespace(p, p->external_pe_depth > 0);
286 return error(p,
"Expected whitespace %s", where);
291 static void clear_xbit(XBit
xbit)
294 xbit->s1 = xbit->s2 = 0;
295 xbit->S1 = xbit->S2 = 0;
296 xbit->attributes = 0;
297 xbit->element_definition = 0;
304 if(xbit->S1)
Free(xbit->S1);
305 if(xbit->S2)
Free(xbit->S2);
308 if(xbit->s2)
Free(xbit->s2);
309 for(a = xbit->attributes; a; a = b)
312 if(a->value)
Free(a->value);
329 static int looking_at(Parser p,
const char8 *
string)
331 InputSource s = p->source;
335 for(c = *
string++; c; c = *
string++)
342 if(c ==
' ' && *
string == 0)
362 static int parse_name(Parser p,
const char8 *where)
364 InputSource s = p->source;
371 error(p,
"Expected name, but got %s %s", escape(c), where);
380 p->name = s->line + s->next - i;
386 static int parse_nmtoken(Parser p,
const char8 *where)
388 InputSource s = p->source;
396 return error(p,
"Expected nmtoken value, but got %s %s",
399 p->name = s->line + s->next - i;
408 static const char8 *escape(
int c)
410 static char8 buf[5][15];
411 static int bufnum=-1;
418 bufnum = (bufnum + 1) % 5;
422 else if(c >= 33 && c <= 126)
423 sprintf(buf[bufnum],
"%c", c);
425 sprintf(buf[bufnum],
"<space>");
427 sprintf(buf[bufnum],
"<0x%x>", c);
443 p->document_entity = 0;
448 clear_xbit(&p->xbit);
450 p->xbit.nchildren = 0;
451 p->xbit.children = 0;
454 p->pbufsize = p->pbufnext = 0;
458 p->dtd_callback = p->warning_callback = 0;
459 p->entity_opener = 0;
461 p->external_pe_depth = 0;
463 p->element_stack = 0;
464 p->element_stack_alloc = 0;
465 p->element_depth = 0;
494 Free(p->element_stack);
502 for(s=p->source; s && s->parent; s = s->parent)
515 p->callback_arg = arg;
520 p->dtd_callback = cb;
525 p->warning_callback = cb;
530 p->entity_opener = opener;
537 XBit bit,
tree, child;
548 if(!(tree =
Malloc(
sizeof(*tree))))
550 error(p,
"System error");
566 error(p,
"EOF in element");
571 if(child->element_definition != tree->element_definition)
573 const Char *name1 = tree->element_definition->name,
574 *name2 = child->element_definition->name;
577 error(p,
"Mismatched end tag: expected </%S>, got </%S>",
585 children =
Realloc(tree->children,
586 (tree->nchildren + 1) *
sizeof(XBit));
591 error(p,
"System error");
594 child->parent =
tree;
595 children[tree->nchildren] = child;
597 tree->children = children;
603 if(!(tree =
Malloc(
sizeof(*tree))))
605 error(p,
"System error");
617 for(i=0; i<tree->nchildren; i++)
620 Free(tree->children);
646 error(p,
"Attempt to peek twice");
658 if(!p->source && !p->document_entity)
659 p->document_entity = source->entity;
661 source->parent = p->source;
675 return error(p,
"Unsupported character encoding %s",
679 return error(p,
"Unknown character encoding");
685 if(looking_at(p,
"<?NSL "))
686 return process_nsl_decl(p);
687 if(looking_at(p,
"<?xml "))
690 if(source->entity == p->document_entity &&
691 !source->entity->version_decl)
692 return error(p,
"XML declaration in document entity lacked " 694 if(source->entity != p->document_entity &&
696 return error(p,
"Standalone attribute not allowed except in " 702 warn(p,
"Found <?XML instead of <?xml; switching to case-" 705 return process_xml_decl(p);
717 p->source = source->parent;
726 static int at_eoe(InputSource s)
739 static void pop_while_at_eoe(Parser p)
743 InputSource s = p->source;
759 p->flags |= (1 << flag);
761 p->flags &= ~(1 << flag);
766 p->dtd->predefined_entities = xml_predefined_entities;
768 p->dtd->predefined_entities = 0;
774 int linenum, charnum;
778 bit->type ==
XBIT_error ?
"Error" :
"Warning",
782 for(s=p->source; s; s=s->parent)
792 Fprintf(
Stderr,
" at line %d char %d of", linenum+1, charnum+1);
796 linenum+1, charnum+1);
808 static int parse(Parser p)
820 clear_xbit(&p->xbit);
823 skip_whitespace(p->source);
834 return error(p,
"Document ends too soon");
839 return parse_markup(p);
844 return error(p,
"Entity reference not allowed in prolog");
845 if(looking_at(p,
"#"))
854 require(parse_reference(p, 0, 1, 1));
862 return parse_pcdata(p);
868 static int parse_markup(Parser p)
870 InputSource s = p->source;
876 if(looking_at(p,
"--"))
879 return parse_comment(p, 0);
886 else if(looking_at(p,
"DOCTYPE "))
888 else if(looking_at(p,
"[CDATA["))
889 return parse_cdata(p);
891 return error(p,
"Syntax error after <!");
894 return parse_endtag(p);
907 return parse_pcdata(p);
909 return parse_starttag(p);
913 static int parse_endtag(Parser p)
915 ElementDefinition def;
919 require(parse_name(p,
"after </"));
920 maybe_uppercase_name(p);
924 if(p->element_depth <= 0)
925 return error(p,
"End tag </%.*S> outside of any element",
926 p->namelen, p->name);
928 ent = p->element_stack[--p->element_depth].entity;
929 def = p->element_stack[p->element_depth].definition;
931 if(p->namelen == def->namelen &&
932 memcmp(p->name, def->name, p->namelen *
sizeof(Char)) == 0)
933 p->xbit.element_definition = def;
935 return error(p,
"Mismatched end tag: expected </%S>, got </%.*S>",
936 def->name, p->namelen, p->name);
938 if(ent != p->source->entity)
939 return error(p,
"Element ends in different entity from that " 940 "in which it starts");
942 if(p->element_depth == 0)
947 p->xbit.element_definition =
FindElementN(p->dtd, p->name, p->namelen);
948 if(!p->xbit.element_definition)
949 return error(p,
"End tag for unknown element %.*S",
950 p->namelen, p->name);
953 skip_whitespace(p->source);
954 return expect(p,
'>',
"after name in end tag");
957 static int parse_starttag(Parser p)
962 return error(p,
"Document contains multiple elements");
966 require(parse_name(p,
"after <"));
967 maybe_uppercase_name(p);
969 p->xbit.element_definition =
FindElementN(p->dtd, p->name, p->namelen);
970 if(!p->xbit.element_definition || p->xbit.element_definition->tentative)
973 return error(p,
"Start tag for undeclared element %.*S",
974 p->namelen, p->name);
976 warn(p,
"Start tag for undeclared element %.*S; " 977 "declaring it to have content ANY",
978 p->namelen, p->name);
979 if(p->xbit.element_definition)
983 if(!(p->xbit.element_definition =
985 return error(p,
"System error");
991 InputSource s = p->source;
1006 return error(p,
"Expected whitespace or tag end in start tag");
1017 require(expect(p,
'>',
"after / in start tag"));
1031 if(p->element_depth == p->element_stack_alloc)
1033 p->element_stack_alloc =
1034 p->element_stack_alloc == 0 ? 20 :
1035 p->element_stack_alloc * 2;
1036 if(!(p->element_stack =
1038 (p->element_stack_alloc *
sizeof(*p->element_stack)))))
1039 return error(p,
"System error");
1041 p->element_stack[p->element_depth].definition =
1042 p->xbit.element_definition;
1043 p->element_stack[p->element_depth++].entity = p->source->entity;
1046 if(p->element_depth == 0)
1052 AttributeDefinition d;
1059 if(!d->default_value)
1061 for(a=p->xbit.attributes; a; a=a->next)
1062 if(a->definition == d)
1066 if(!(a =
Malloc(
sizeof(*a))))
1067 return error(p,
"System error");
1069 if(!(a->value =
Strdup(d->default_value)))
1070 return error(p,
"System error");
1072 a->next = p->xbit.attributes;
1073 p->xbit.attributes = a;
1081 static int parse_attribute(Parser p)
1083 InputSource s = p->source;
1084 AttributeDefinition def;
1088 require(parse_name(p,
"for attribute"));
1089 maybe_uppercase_name(p);
1091 def =
FindAttributeN(p->xbit.element_definition, p->name, p->namelen);
1095 return error(p,
"Undeclared attribute %.*S for element %S",
1096 p->namelen, p->name, p->xbit.element_definition->name);
1098 warn(p,
"Undeclared attribute %.*S for element %S; " 1099 "declaring it as CDATA #IMPLIED",
1100 p->namelen, p->name, p->xbit.element_definition->name);
1102 p->name, p->namelen,
1104 return error(p,
"System error");
1107 for(a = p->xbit.attributes; a; a = a->
next)
1109 return error(p,
"Repeated attribute %.*S", p->namelen, p->name);
1111 if(!(a =
Malloc(
sizeof(*a))))
1112 return error(p,
"System error");
1115 a->
next = p->xbit.attributes;
1116 p->xbit.attributes = a;
1120 require(expect(p,
'=',
"after attribute name"));
1130 require(parse_string(p,
"in attribute value",
1138 return error(p,
"Value of attribute is unquoted");
1140 require(parse_nmtoken(p,
"in unquoted attribute value"));
1148 static int transcribe(Parser p,
int back,
int count)
1150 ExpandBuf(p->pbuf, p->pbufnext + count);
1151 memcpy(p->pbuf + p->pbufnext,
1152 p->source->line + p->source->next - back,
1153 count *
sizeof(Char));
1154 p->pbufnext += count;
1160 static int parse_pcdata(Parser p)
1168 return error(p,
"Character data not allowed in prolog");
1170 return error(p,
"Character data not allowed after body");
1175 buflen = s->line_length;
1186 require(transcribe(p, count, count));
1194 pop_while_at_eoe(p);
1199 buflen = s->line_length;
1212 if(buf[next] !=
'!' && buf[next] !=
'/' && buf[next] !=
'?' &&
1219 require(transcribe(p, count+1, count));
1223 buflen >= next + 3 &&
1224 buf[next] ==
'!' && buf[next+1] ==
'-' && buf[next+2] ==
'-')
1228 buflen = s->line_length;
1241 (p->pbufnext > 0 || count > 0))
1250 require(transcribe(p, count, count));
1254 if(buflen >= next+1 && buf[next] ==
'#')
1261 require(transcribe(p, count+2, count));
1264 require(parse_character_reference(p,
1278 require(transcribe(p, count+1, count));
1286 buflen = s->line_length;
1295 buflen >= next + 2 &&
1296 buf[next] ==
']' && buf[next+1] ==
'>')
1297 return error(p,
"Illegal character sequence ']]>' in pcdata");
1307 p->pbuf[p->pbufnext++] = 0;
1309 p->xbit.pcdata_chars = p->pbuf;
1317 static int parse_comment(Parser p,
int skip)
1319 InputSource s = p->source;
1326 while((c =
get(s)) !=
XEOE)
1329 if(c1 ==
'-' && c2 ==
'-')
1334 return error(p,
"-- in comment");
1341 require(transcribe(p, count, count));
1349 return error(p,
"EOE in comment");
1354 require(transcribe(p, count, count-3));
1355 p->pbuf[p->pbufnext++] = 0;
1357 p->xbit.comment_chars = p->pbuf;
1363 static int parse_pi(Parser p)
1365 InputSource s = p->source;
1368 Char xml[] = {
'x',
'm',
'l', 0};
1370 require(parse_name(p,
"after <?"));
1378 return error(p,
"Misplaced or wrong-case xml declaration");
1380 warn(p,
"Misplaced or wrong-case xml declaration; treating as PI");
1395 return error(p,
"Expected whitespace after PI name");
1398 while((c =
get(s)) !=
XEOE)
1406 require(transcribe(p, count, count));
1413 return error(p,
"EOE in PI");
1417 p->pbuf[p->pbufnext++] = 0;
1419 p->xbit.pi_chars = p->pbuf;
1429 InputSource start_source, s;
1431 s = start_source = p->source;
1434 if(quote !=
'\'' && quote !=
'"')
1437 return error(p,
"Expected quoted string %s, but got %s",
1438 where, escape(quote));
1458 require(transcribe(p, count+1, count));
1462 p->pbuf[p->pbufnext++] =
' ';
1468 return error(p,
"Illegal character '<' %s", where);
1473 if(s == start_source)
1475 return error(p,
"Quoted string goes past entity end");
1479 require(transcribe(p, count, count));
1494 require(transcribe(p, count+1, count));
1497 if(p->external_pe_depth == 0)
1500 return error(p,
"PE ref not allowed here in internal subset");
1502 require(parse_reference(p, 1, 1, 1));
1517 require(transcribe(p, count+1, count));
1520 if(looking_at(p,
"#"))
1521 require(parse_character_reference(p,
1535 if(c == quote && p->source == start_source)
1540 if(
at_eol(s) && count > 0)
1542 require(transcribe(p, count, count));
1549 require(transcribe(p, count+1, count));
1552 p->pbuf[p->pbufnext++] = 0;
1558 new = old = p->pbuf;
1583 if(
new > p->pbuf &&
new[-1] ==
' ')
1592 static int parse_dtd(Parser p)
1594 InputSource s = p->source;
1595 Entity
parent = s->entity;
1596 Entity internal_part = 0, external_part = 0;
1598 char8 *publicid = 0, *systemid = 0;
1604 require(parse_name(p,
"for name in dtd"));
1606 maybe_uppercase(p, name);
1609 if ( parse_external_id(p, 0, &publicid, &systemid,
1616 if(systemid || publicid)
1622 return error(p,
"System error");
1627 if(looking_at(p,
"["))
1629 int line = s->line_number, cpos = s->next;
1631 if (read_markupdecls(p) < 0) {
1642 return error(p,
"System error");
1646 require(expect(p,
'>',
"at end of dtd"));
1657 return error(p,
"Misplaced or repeated DOCTYPE declaration");
1659 warn(p,
"Misplaced or repeated DOCTYPE declaration");
1674 p->dtd->name = name;
1675 p->dtd->internal_part = internal_part;
1676 p->dtd->external_part = external_part;
1686 if(external_part && p->standalone !=
SDD_yes)
1698 static int read_markupdecls(Parser p)
1700 InputSource s = p->source;
1702 int c, d, hyphens=0;
1711 return error(p,
"EOE in DTD");
1725 require(transcribe(p, count+1, count));
1726 p->pbuf[p->pbufnext++] = 0;
1737 while((d =
get(s)) !=
XEOE)
1742 require(transcribe(p, count, count));
1749 return error(p,
"EOE in DTD");
1756 while((d =
get(s)) !=
XEOE)
1761 require(transcribe(p, count, count));
1772 return error(p,
"EOE in DTD");
1780 if(
at_eol(s) && count > 0)
1782 require(transcribe(p, count, count));
1788 static int process_nsl_decl(Parser p)
1790 InputSource s = p->source;
1793 s->entity->ml_decl =
ML_nsl;
1796 if(s->entity->encoding ==
CE_UTF_8)
1801 if(!looking_at(p,
"DDB "))
1802 return error(p,
"Expected \"DDB\" in NSL declaration");
1808 return error(p,
"EOE in NSL declaration");
1811 return error(p,
"Syntax error in NSL declaration");
1818 require(transcribe(p, count+1, count));
1819 p->pbuf[p->pbufnext++] = 0;
1822 if(!looking_at(p,
"0>"))
1823 return error(p,
"Expected \"0>\" at end of NSL declaration");
1826 return error(p,
"System error");
1831 static int process_xml_decl(Parser p)
1833 InputSource s = p->source;
1834 enum {None, V, E, S} which, last = None;
1840 s->entity->ml_decl =
ML_xml;
1844 while(!looking_at(p,
"?>"))
1846 if(looking_at(p,
"version"))
1848 else if(looking_at(p,
"encoding"))
1850 else if(looking_at(p,
"standalone"))
1853 return error(p,
"Expected \"version\", \"encoding\" or " 1854 "\"standalone\" in XML declaration");
1859 return error(p,
"Repeated or misordered attributes " 1860 "in XML declaration");
1861 warn(p,
"Repeated or misordered attributes in XML declaration");
1866 require(expect(p,
'=',
"after attribute name in XML declaration"));
1869 require(parse_string(p,
"for attribute value in XML declaration",
1872 maybe_uppercase(p, p->pbuf);
1877 if(!is_ascii_alpha(Value[0]))
1878 return error(p,
"Encoding name does not begin with letter");
1879 for(cp=Value+1; *cp; cp++)
1880 if(!is_ascii_alpha(*cp) && !is_ascii_digit(*cp) &&
1881 *cp !=
'.' && *cp !=
'_' && *cp !=
'-')
1882 return error(p,
"Illegal character %s in encoding name",
1889 return error(p,
"Unknown declared encoding %s", value);
1897 p->source->entity->encoding = enc;
1901 return error(p,
"Declared encoding %s is incompatible with %s " 1902 "which was used to read it",
1906 s->entity->encoding_decl = enc;
1913 if(str_maybecase_cmp8(p, value,
"no") == 0)
1915 else if(str_maybecase_cmp8(p, value,
"yes") == 0)
1918 return error(p,
"Expected \"yes\" or \"no\" " 1919 "for standalone in XML declaration");
1921 s->entity->standalone_decl = p->standalone;
1926 for(cp=Value; *cp; cp++)
1927 if(!is_ascii_alpha(*cp) && !is_ascii_digit(*cp) &&
1928 *cp !=
'.' && *cp !=
'_' && *cp !=
'-' && *cp !=
':')
1929 return error(p,
"Illegal character %s in version number",
1932 if(!s->entity->version_decl)
1934 return error(p,
"System error");
1941 return error(p,
"Expected whitespace or \"?>\" after attribute " 1942 "in XML declaration");
1948 static int parse_cdata(Parser p)
1950 InputSource s = p->source;
1955 return error(p,
"Cdata section not allowed in prolog");
1957 return error(p,
"Cdata section not allowed after body");
1961 while((c =
get(s)) !=
XEOE)
1964 if(c ==
'>' && c1 ==
']' && c2 ==
']')
1968 require(transcribe(p, count, count));
1975 return error(p,
"EOE in CData section");
1977 require(transcribe(p, count, count-3));
1978 p->pbuf[p->pbufnext++] = 0;
1980 p->xbit.cdsect_chars = p->pbuf;
1991 source = p->entity_opener(e, p->callback_arg);
2007 p->external_pe_depth = (source->entity->type ==
ET_external);
2009 while(parse_markupdecl(p) == 0)
2012 p->external_pe_depth = 0;
2027 static int parse_markupdecl(Parser p)
2031 int cur_line, cur_char;
2035 return error(p,
"Attempt to continue reading DTD after error");
2037 clear_xbit(&p->xbit);
2039 require(skip_dtd_whitespace(p, 1));
2043 cur_ent = s->entity;
2044 cur_line = s->line_number;
2054 if(looking_at(p,
"!ELEMENT"))
2056 require(expect_dtd_whitespace(p,
"after ELEMENT"));
2057 return parse_element_decl(p);
2059 else if(looking_at(p,
"!ATTLIST"))
2061 require(expect_dtd_whitespace(p,
"after ATTLIST"));
2062 return parse_attlist_decl(p);
2064 else if(looking_at(p,
"!ENTITY"))
2066 require(expect_dtd_whitespace(p,
"after ENTITY"));
2067 return parse_entity_decl(p, cur_ent, cur_line, cur_char);
2069 else if(looking_at(p,
"!NOTATION"))
2071 require(expect_dtd_whitespace(p,
"after NOTATION"));
2072 return parse_notation_decl(p);
2074 else if(looking_at(p,
"!["))
2075 return parse_conditional(p);
2076 else if(looking_at(p,
"?"))
2080 p->dtd_callback(&p->xbit, p->callback_arg);
2085 else if(looking_at(p,
"!--"))
2091 p->dtd_callback(&p->xbit, p->callback_arg);
2097 return parse_comment(p, 1);
2100 return error(p,
"Syntax error after < in dtd");
2103 return error(p,
"Expected \"<\" in dtd, but got %s", escape(c));
2107 static int parse_reference(Parser p,
int pe,
int expand,
int allow_external)
2112 require(parse_name(p, pe ?
"for parameter entity" :
"for entity"));
2113 require(expect(p,
';',
"after entity name"));
2116 return transcribe(p, 1 + p->namelen + 1, 1 + p->namelen + 1);
2126 return error(p,
"Undefined%s entity %.*S",
2127 pe ?
" parameter" :
"" ,
2128 p->namelen > 50 ? 50 : p->namelen, p->name);
2130 warn(p,
"Undefined%s entity %.*S",
2131 pe ?
" parameter" :
"",
2132 p->namelen > 50 ? 50 : p->namelen, p->name);
2136 buf =
Malloc((5 + p->namelen + 1 + 1) *
sizeof(Char));
2138 return error(p,
"System error");
2140 *q++ =
'&'; *q++ =
'#'; *q++ =
'3'; *q++ =
'8'; *q++ =
';';
2141 for(i=0; i<p->namelen; i++)
2147 return error(p,
"System error");
2149 return error(p,
"System error");
2153 return error(p,
"Illegal reference to external entity");
2155 for(s = p->source; s; s = s->parent)
2157 return error(p,
"Recursive reference to entity \"%S\"", e->name);
2160 s = p->entity_opener(e, p->callback_arg);
2164 return error(p,
"Couldn't open entity %S, %s",
2172 static int parse_character_reference(Parser p,
int expand)
2174 InputSource s = p->source;
2177 unsigned int code = 0;
2178 Char *ch = s->line + s->next;
2180 if(looking_at(p,
"x"))
2186 while((c =
get(s)) !=
';')
2188 if((c >=
'0' && c <=
'9') ||
2189 (base == 16 && ((c >=
'A' && c <=
'F') ||
2190 (c >=
'a' && c <=
'f'))))
2196 "Illegal character %s in base-%d character reference",
2202 return transcribe(p, 2 + (base == 16) + count + 1,
2203 2 + (base == 16) + count + 1);
2208 if(c >=
'0' && c <=
'9')
2209 code = code * base + (c -
'0');
2210 else if(c >=
'A' && c <=
'F')
2211 code = code * base + 10 + (c -
'A');
2213 code = code * base + 10 + (c -
'a');
2220 return error(p,
"0x%x is not a valid 8-bit XML character", code);
2222 warn(p,
"0x%x is not a valid 8-bit XML character; ignored", code);
2229 return error(p,
"0x%x is not a valid UTF-16 XML character", code);
2231 warn(p,
"0x%x is not a valid UTF-16 XML character; ignored", code);
2242 p->pbuf[p->pbufnext++] = (code >> 10) + 0xd800;
2243 p->pbuf[p->pbufnext++] = (code & 0x3ff) + 0xdc00;
2250 p->pbuf[p->pbufnext++] = code;
2257 static int parse_element_decl(Parser p)
2261 ElementDefinition def;
2266 Char pcdata[] = {
'#',
'P',
'C',
'D',
'A',
'T',
'A',0};
2270 require(parse_name(p,
"for name in element declaration"));
2272 maybe_uppercase(p, name);
2274 if (expect_dtd_whitespace(p,
"after name in element declaration") <0) {
2279 if(looking_at(p,
"EMPTY"))
2284 else if(looking_at(p,
"ANY"))
2291 if(looking_at(p,
"("))
2294 if(!(cp = parse_cp(p)) ||
2295 check_content_decl(p, cp) < 0 ||
2296 !(content = stringify_cp(cp)))
2315 return error(p,
"Expected \"EMPTY\", \"ANY\", or \"(\" after name in " 2316 "element declaration");
2326 while((c =
get(p->source)) !=
'>')
2332 require(transcribe(p, count, count));
2333 if(!p->source->parent)
2334 return error(p,
"EOE in element declaration");
2340 require(transcribe(p, count+1, count));
2341 if(p->external_pe_depth == 0)
2345 "PE ref not allowed here in internal subset");
2347 require(parse_reference(p, 1, 1, 1));
2354 require(transcribe(p, count, count));
2361 require(transcribe(p, count, count));
2362 p->pbuf[p->pbufnext++] = 0;
2364 if(
Strstr(p->pbuf, pcdata))
2373 if (skip_dtd_whitespace(p, p->external_pe_depth > 0) < 0) {
2377 require(expect(p,
'>',
"at end of element declaration"));
2387 warn(p,
"Ignoring redeclaration of element %S", name);
2392 return error(p,
"System error");
2402 static ContentParticle parse_cp(Parser p)
2406 if(looking_at(p,
"("))
2408 if(!(cp = parse_choice_or_seq(p)))
2411 else if(looking_at(p,
"#PCDATA"))
2413 if(!(cp =
Malloc(
sizeof(*cp))))
2415 error(p,
"System error");
2423 if(parse_name(p,
"in content declaration") < 0)
2426 if(!(cp =
Malloc(
sizeof(*cp))))
2428 error(p,
"System error");
2436 if(looking_at(p,
"*"))
2437 cp->repetition =
'*';
2438 else if(looking_at(p,
"+"))
2439 cp->repetition =
'+';
2440 else if(looking_at(p,
"?"))
2441 cp->repetition =
'?';
2450 static ContentParticle parse_choice_or_seq(Parser p)
2452 ContentParticle cp, cp1;
2455 require0(skip_dtd_whitespace(p, p->external_pe_depth > 0));
2457 if(!(cp1 = parse_cp(p)))
2460 require0(skip_dtd_whitespace(p, p->external_pe_depth > 0));
2462 if(!(cp = parse_choice_or_seq_1(p, 1, 0)))
2465 cp->children[0] = cp1;
2472 static ContentParticle parse_choice_or_seq_1(Parser p,
int nchildren,
char sep)
2474 ContentParticle cp = 0, cp1;
2475 int nsep =
get(p->source);
2481 if(!(cp =
Malloc(
sizeof(*cp))) ||
2485 error(p,
"System error");
2499 if(nsep !=
'|' && nsep !=
',')
2501 error(p,
"Expected | or , or ) in content declaration, got %s",
2506 if(sep && nsep != sep)
2508 error(p,
"Content particle contains both | and ,");
2512 require0(skip_dtd_whitespace(p, p->external_pe_depth > 0));
2514 if(!(cp1 = parse_cp(p)))
2517 require0(skip_dtd_whitespace(p, p->external_pe_depth > 0));
2519 if(!(cp = parse_choice_or_seq_1(p,
nchildren+1, (
char)nsep)))
2529 static int check_content_decl(Parser p, ContentParticle cp)
2535 for(i=1; i<cp->nchildren; i++)
2536 if(cp->children[i]->type !=
CP_name)
2537 return error(p,
"Invalid mixed content declaration");
2539 if(cp->repetition !=
'*' &&
2540 !(cp->nchildren == 1 && cp->repetition == 0))
2541 return error(p,
"Invalid mixed content declaration");
2546 return check_content_decl_1(p, cp);
2549 static int check_content_decl_1(Parser p, ContentParticle cp)
2556 return error(p,
"Misplaced #PCDATA in content declaration");
2559 for(i=0; i<cp->nchildren; i++)
2560 if(check_content_decl_1(p, cp->children[i]) < 0)
2570 static Char *stringify_cp(ContentParticle cp)
2572 int size = size_cp(cp);
2576 if(!(s =
Malloc((size+1) *
sizeof(Char))) ||
2591 static void print_cp(ContentParticle cp, FILE16 *
f)
2606 for(i=0; i<cp->nchildren; i++)
2610 print_cp(cp->children[i], f);
2617 Fprintf(f,
"%c", cp->repetition);
2620 static int size_cp(ContentParticle cp)
2634 for(i=0; i<cp->nchildren; i++)
2638 s += size_cp(cp->children[i]);
2665 for(i=0; i<cp->nchildren; i++)
2676 static int parse_attlist_decl(Parser p)
2679 ElementDefinition element;
2682 Char **allowed_values, *t;
2683 Char *default_value;
2686 require(parse_name(p,
"for name in attlist declaration"));
2688 maybe_uppercase(p, name);
2693 return error(p,
"System error");
2697 require(expect_dtd_whitespace(p,
2698 "after element name in attlist declaration"));
2700 while(!looking_at(p,
">"))
2702 require(skip_dtd_whitespace(p, p->external_pe_depth > 0));
2703 require(parse_name(p,
"for attribute in attlist declaration"));
2705 maybe_uppercase(p, name);
2707 require(expect_dtd_whitespace(p,
"after name in attlist declaration"));
2709 if(looking_at(p,
"CDATA"))
2711 else if(looking_at(p,
"IDREFS"))
2713 else if(looking_at(p,
"IDREF"))
2715 else if(looking_at(p,
"ID"))
2717 else if(looking_at(p,
"ENTITIES"))
2719 else if(looking_at(p,
"ENTITY"))
2721 else if(looking_at(p,
"NMTOKENS"))
2723 else if(looking_at(p,
"NMTOKEN"))
2725 else if(looking_at(p,
"NOTATION"))
2732 if(expect_dtd_whitespace(p,
"after attribute type") < 0) {
2741 "or keyword for type in attlist declaration"));
2747 require(skip_dtd_whitespace(p, p->external_pe_depth > 0));
2750 "for notation value in attlist declaration"));
2753 "for enumerated value in attlist declaration"));
2754 maybe_uppercase_name(p);
2755 ExpandBuf(p->pbuf, p->pbufnext + p->namelen + 1);
2756 memcpy(p->pbuf+p->pbufnext,
2758 p->namelen *
sizeof(Char));
2759 p->pbuf[p->pbufnext + p->namelen] = 0;
2760 p->pbufnext += (p->namelen + 1);
2762 if (skip_dtd_whitespace(p, p->external_pe_depth > 0) < 0) {
2767 while(looking_at(p,
"|"));
2770 "at end of enumerated value list in attlist declaration"));
2771 if(expect_dtd_whitespace(p,
"after enumerated value list " 2772 "in attlist declaration") < 0) {
2777 allowed_values =
Malloc((nvalues+1)*
sizeof(Char *));
2779 return error(p,
"System error");
2780 for(i=0, t=p->pbuf; i<nvalues; i++)
2782 allowed_values[i] = t;
2786 allowed_values[nvalues] = 0;
2793 if(looking_at(p,
"#REQUIRED"))
2795 else if(looking_at(p,
"#IMPLIED"))
2797 else if(looking_at(p,
"#FIXED"))
2800 if (expect_dtd_whitespace(p,
"after #FIXED") <0) {
2801 Free(allowed_values);
2812 "for default value in attlist declaration",
2815 default_value = p->pbuf;
2818 maybe_uppercase(p, default_value);
2823 if (skip_dtd_whitespace(p, p->external_pe_depth > 0) < 0 ) {
2824 Free(allowed_values);
2832 warn(p,
"Ignoring redeclaration of attribute %S", name);
2835 Free(allowed_values[0]);
2836 Free(allowed_values);
2839 Free(default_value);
2843 default_type, default_value))
2844 return error(p,
"System error");
2855 static int parse_external_id(Parser p,
int required,
2859 InputSource s = p->source;
2866 if(looking_at(p,
"SYSTEM"))
2871 c =
get(s);
unget(s);
2872 if(c !=
'"' && c !=
'\'')
2876 require(expect_dtd_whitespace(p,
"after SYSTEM"));
2880 return error(p,
"System error");
2882 else if(looking_at(p,
"PUBLIC"))
2887 c =
get(s);
unget(s);
2888 if(c !=
'"' && c !=
'\'')
2892 require(expect_dtd_whitespace(p,
"after PUBLIC"));
2896 for(cp=p->pbuf; *cp; cp++)
2897 if(!is_ascii_alpha(*cp) && !is_ascii_digit(*cp) &&
2898 strchr8(
"-'()+,./:=?;!*#@$_% \r\n", *cp) == 0)
2899 return error(p,
"Illegal character %s in public id",
2903 return error(p,
"System error");
2908 c =
get(s);
unget(s);
2909 if(c !=
'"' && c !=
'\'')
2913 require(expect_dtd_whitespace(p,
"after public id"));
2917 return error(p,
"System error");
2920 return error(p,
"Missing or invalid external ID");
2927 static int parse_entity_decl(Parser p, Entity ent,
int line,
int chpos)
2933 pe = looking_at(p,
"%");
2936 require(skip_dtd_whitespace(p, p->external_pe_depth > 0));
2937 require(parse_name(p,
"for name in entity declaration"));
2939 if (expect_dtd_whitespace(p,
"after name in entity declaration") <0) {
2944 if(looking_at(p,
"'") || looking_at(p,
"\""))
2949 if(parse_string(p,
"for value in entity declaration",
LT_entity) <0) {
2957 return error(p,
"System error");
2961 char8 *publicid, *systemid;
2962 NotationDefinition notation = 0;
2964 if (parse_external_id(p, 1, &publicid, &systemid, 1, 1) < 0) {
2969 if ((t = skip_dtd_whitespace(p, p->external_pe_depth > 0)) < 0) {
2973 if(looking_at(p,
"NDATA"))
2976 return error(p,
"Whitespace missing before NDATA");
2978 return error(p,
"NDATA not allowed for parameter entity");
2979 if (expect_dtd_whitespace(p,
"after NDATA") <0) {
2984 require(parse_name(p,
"for notation name in entity declaration"));
2985 maybe_uppercase_name(p);
2992 return error(p,
"System error");
2997 return error(p,
"System error");
3002 require(skip_dtd_whitespace(p, p->external_pe_depth > 0));
3003 require(expect(p,
'>',
"at end of entity declaration"));
3005 if((old =
FindEntity(p->dtd, e->name, pe)) &&
3006 old->parent != xml_builtin_entity)
3009 warn(p,
"Ignoring redefinition of%s entity %S",
3010 pe ?
" parameter" :
"", e->name);
3014 return error(p,
"System error");
3022 static int parse_notation_decl(Parser p)
3025 char8 *publicid, *systemid;
3026 NotationDefinition def;
3028 require(parse_name(p,
"for name in notation declaration"));
3030 maybe_uppercase(p, name);
3032 if (expect_dtd_whitespace(p,
"after name in notation declaration") < 0) {
3037 if (parse_external_id(p, 1, &publicid, &systemid, 1, 0) < 0) {
3042 if (skip_dtd_whitespace(p, p->external_pe_depth > 0) < 0) {
3047 if (expect(p,
'>',
"at end of notation declaration") < 0) {
3059 warn(p,
"Ignoring redefinition of notation %S", name);
3060 if(publicid)
Free(publicid);
3061 if(systemid)
Free(systemid);
3067 return error(p,
"System error");
3075 static int parse_conditional(Parser p)
3079 if(p->external_pe_depth == 0)
3080 return error(p,
"Conditional section not allowed in internal subset");
3082 require(skip_dtd_whitespace(p, p->external_pe_depth > 0));
3083 if(looking_at(p,
"INCLUDE"))
3085 require(skip_dtd_whitespace(p, p->external_pe_depth > 0));
3086 require(expect(p,
'[',
"at start of conditional section"));
3087 require(skip_dtd_whitespace(p, p->external_pe_depth > 0));
3088 while(!looking_at(p,
"]"))
3090 switch(parse_markupdecl(p))
3093 return error(p,
"EOF in conditional section");
3097 require(skip_dtd_whitespace(p, p->external_pe_depth > 0));
3100 if(!looking_at(p,
"]>"))
3101 return error(p,
"]> required after ] in conditional section");
3103 else if(looking_at(p,
"IGNORE"))
3107 require(skip_dtd_whitespace(p, p->external_pe_depth > 0));
3108 require(expect(p,
'[',
"at start of conditional section"));
3112 switch(
get(p->source))
3115 if(p->source->parent)
3118 return error(p,
"EOE in ignored conditional section");
3121 if(looking_at(p,
"!["))
3125 if(looking_at(p,
"]>"))
3131 return error(p,
"INCLUDE or IGNORE required in conditional section");
3136 static void maybe_uppercase(Parser p, Char *s)
3146 static void maybe_uppercase_name(Parser p)
3151 for(i=0; i<p->namelen; i++)
3152 p->name[i] =
Toupper(p->name[i]);
3155 static int str_maybecase_cmp8(Parser p,
const char8 *a,
const char8 *b)
3161 static int is_ascii_alpha(
int c)
3163 return (c >=
'a' && c <=
'z') || (c >=
'A' && c <=
'Z');
3166 static int is_ascii_digit(
int c)
3168 return c >=
'0' && c <=
'9';
3173 static void verror(XBit bit,
const char8 *format, va_list args)
3176 static char8 message[400];
3183 bit->error_message = message;
3186 static int error(Parser p,
const char8 *format, ...)
3190 va_start(args, format);
3191 verror(&p->xbit, format, args);
3198 static void warn(Parser p,
const char8 *format, ...)
3201 static struct xbit bit;
3203 va_start(args, format);
3204 verror(&bit, format, args);
3208 if(p->warning_callback)
3209 p->warning_callback(&bit, p->callback_arg);
XML_API AttributeDefinition DefineAttributeN(ElementDefinition element, const Char *name, int namelen, AttributeType type, Char **allowed_values, DefaultType default_type, const Char *default_value)
#define ExpandBuf(buf, sz)
#define DefineNotation(dtd, name, pub, sys)
XML_API void FreeEntity(Entity e)
void FreeParser(Parser p)
STD_API const char8 * CharacterEncodingName[CE_enum_count]
InputSource EntityOpenerProc(Entity e, void *arg)
void ParserSetWarningCallback(Parser p, CallbackProc cb)
#define FindNotation(dtd, name)
void ParserSetCallbackArg(Parser p, void *arg)
#define is_xml_namechar(c)
void ParserPerror(Parser p, XBit bit)
STD_API int strcasecmp8(const char8 *, const char8 *)
bool save(Lattice &lattice, EST_String filename)
#define NewInternalEntity(name, test, parent, l, l1, mat)
STD_API int Vsprintf(void *buf, CharacterEncoding enc, const char *format, va_list args)
#define FindElement(dtd, name)
void CallbackProc(XBit bit, void *arg)
XML_API const char8 * EntityDescription(Entity e)
EST_Track error(EST_Track &ref, EST_Track &test, int relax=0)
enum attribute_type AttributeType
XML_API Entity DefineEntity(Dtd dtd, Entity entity, int pe)
#define NewExternalEntity(name, pub, sys, nnot, parent)
#define FindEntity(dtd, name, pe)
XML_API ElementDefinition DefineElementN(Dtd dtd, const Char *name, int namelen, ContentType type, Char *content)
STD_API int Toupper(int c)
void ParserSetFlag(Parser p, ParserFlag flag, int value)
enum content_type ContentType
void ParserSetDtdCallback(Parser p, CallbackProc cb)
#define DefineAttribute(element, name, type, all, dt, dv)
#define is_xml_whitespace(c)
STD_API void init_charset(void)
#define TentativelyDefineElement(dtd, name)
AttributeDefinition NextAttributeDefinition(ElementDefinition element, AttributeDefinition previous)
AttributeDefinition definition
enum default_type DefaultType
void FreeContentParticle(ContentParticle cp)
XML_API NotationDefinition FindNotationN(Dtd dtd, const Char *name, int namelen)
const char8 * XBitTypeName[XBIT_enum_count]
XML_API Entity FindEntityN(Dtd dtd, const Char *name, int namelen, int pe)
STD_API void init_stdio16(void)
XBit ParseDtd(Parser p, Entity e)
STD_API int EncodingsCompatible(CharacterEncoding enc1, CharacterEncoding enc2, CharacterEncoding *enc3)
#define DefineElement(dtd, name, type, content)
void FreeXTree(XBit tree)
enum character_encoding CharacterEncoding
XML_API ElementDefinition RedefineElement(ElementDefinition e, ContentType type, Char *content)
enum parser_flag ParserFlag
#define is_xml_namestart(c)
Entity ParserRootEntity(Parser p)
#define FindAttribute(element, name)
STD_API int Fclose(FILE16 *file)
XML_API ElementDefinition FindElementN(Dtd dtd, const Char *name, int namelen)
STD_API int EncodingIsAsciiSuperset(CharacterEncoding enc)
STD_API int Fprintf(FILE16 *file, const char *format,...)
void ParserSetEntityOpener(Parser p, EntityOpenerProc opener)
InputSource ParserRootSource(Parser p)
STD_API char8 * strdup8(const char8 *s)
STD_API CharacterEncoding FindEncoding(char8 *name)
XML_API NotationDefinition RedefineNotation(NotationDefinition n, const char8 *publicid, const char8 *systemid)
void * Realloc(void *mem, int bytes)
EST_Item * parent(const EST_Item *n)
return parent of n
XML_API AttributeDefinition FindAttributeN(ElementDefinition element, const Char *name, int namelen)
#define ParserGetFlag(p, flag)
XML_API NotationDefinition TentativelyDefineNotationN(Dtd dtd, const Char *name, int namelen)
int ParserPush(Parser p, InputSource source)
STD_API FILE16 * MakeFILE16FromString(void *buf, long size, const char *type)
STD_API void init_ctype16(void)
XML_API Entity NewInternalEntityN(const Char *name, int namelen, const Char *text, Entity parent, int line_offset, int line1_char_offset, int matches_parent_text)