Edinburgh Speech Tools  2.1-release
xmlparser.c
Go to the documentation of this file.
1 /*************************************************************************/
2 /* */
3 /* Copyright (c) 1997-98 Richard Tobin, Language Technology Group, HCRC, */
4 /* University of Edinburgh. */
5 /* */
6 /* THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, */
7 /* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
8 /* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
9 /* IN NO EVENT SHALL THE AUTHOR OR THE UNIVERSITY OF EDINBURGH BE LIABLE */
10 /* FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF */
11 /* CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION */
12 /* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
13 /* */
14 /*************************************************************************/
15 /* $Id: xmlparser.c,v 1.3 2004/05/04 00:00:17 awb Exp $ */
16 
17 #ifndef lint
18 static char vcid[] = "$Id: xmlparser.c,v 1.3 2004/05/04 00:00:17 awb Exp $";
19 #endif /* lint */
20 
21 /*
22  * XML (and nSGML) parser.
23  * Author: Richard Tobin.
24  */
25 
26 #include <stdarg.h>
27 #include <stdlib.h>
28 
29 #ifdef FOR_LT
30 
31 #include "lt-memory.h"
32 #include "nsllib.h"
33 
34 #define Malloc salloc
35 #define Realloc srealloc
36 #define Free sfree
37 
38 #else
39 
40 #include "system.h"
41 
42 #endif
43 
44 #include "charset.h"
45 #include "string16.h"
46 #include "ctype16.h"
47 #include "dtd.h"
48 #include "input.h"
49 #include "stdio16.h"
50 #include "xmlparser.h"
51 
52 static int transcribe(Parser p, int back, int count);
53 static void pop_while_at_eoe(Parser p);
54 static void maybe_uppercase(Parser p, Char *s);
55 static void maybe_uppercase_name(Parser p);
56 static int str_maybecase_cmp8(Parser p, const char8 *a, const char8 *b);
57 static int is_ascii_alpha(int c);
58 static int is_ascii_digit(int c);
59 static int parse_external_id(Parser p, int required,
60  char8 **publicid, char8 **systemid,
61  int preq, int sreq);
62 static int parse_conditional(Parser p);
63 static int parse_notation_decl(Parser p);
64 static int parse_entity_decl(Parser p, Entity ent, int line, int chpos);
65 static int parse_attlist_decl(Parser p);
66 static int parse_element_decl(Parser p);
67 static ContentParticle parse_cp(Parser p);
68 static ContentParticle parse_choice_or_seq(Parser p);
69 static ContentParticle parse_choice_or_seq_1(Parser p, int nchildren,char sep);
70 static int check_content_decl(Parser p, ContentParticle cp);
71 static int check_content_decl_1(Parser p, ContentParticle cp);
72 static Char *stringify_cp(ContentParticle cp);
73 static void print_cp(ContentParticle cp, FILE16 *f);
74 static int size_cp(ContentParticle cp);
75 void FreeContentParticle(ContentParticle cp);
76 static int parse_reference(Parser p, int pe, int expand, int allow_external);
77 static int parse_character_reference(Parser p, int expand);
78 static const char8 *escape(int c);
79 static int parse_name(Parser p, const char8 *where);
80 static int parse_nmtoken(Parser p, const char8 *where);
81 static int looking_at(Parser p, const char8 *string);
82 static void clear_xbit(XBit xbit);
83 static int expect(Parser p, int expected, const char8 *where);
84 static int expect_dtd_whitespace(Parser p, const char8 *where);
85 static void skip_whitespace(InputSource s);
86 static int skip_dtd_whitespace(Parser p, int allow_pe);
87 static int parse_cdata(Parser p);
88 static int process_nsl_decl(Parser p);
89 static int process_xml_decl(Parser p);
90 static int parse_dtd(Parser p);
91 static int read_markupdecls(Parser p);
92 static int error(Parser p, const char8 *format, ...);
93 static void warn(Parser p, const char8 *format, ...);
94 static void verror(XBit bit, const char8 *format, va_list args);
96 static int parse_string(Parser p, const char8 *where, enum literal_type type);
97 static int parse_pi(Parser p);
98 static int parse_comment(Parser p, int skip);
99 static int parse_pcdata(Parser p);
100 static int parse_starttag(Parser p);
101 static int parse_attribute(Parser p);
102 static int parse_endtag(Parser p);
103 static int parse_markup(Parser p);
104 static int parse(Parser p);
105 static int parse_markupdecl(Parser p);
106 
107 #define require(x) if(x >= 0) {} else return -1
108 #define require0(x) if(x >= 0) {} else return 0
109 
110 #define Consume(buf) (buf = 0, buf##size = 0)
111 #define ExpandBuf(buf, sz) \
112  if(buf##size >= (sz)+1) {} else if((buf = Realloc(buf, (buf##size = sz + 1) * sizeof(Char)))) {} else return error(p, "System error")
113 
114 #define CopyName(n) if((n = Malloc((p->namelen + 1)*sizeof(Char)))) {memcpy(n, p->name, p->namelen * sizeof(Char)); n[p->namelen] = 0;} else return error(p, "System error");
115 
116 #define CopyName0(n) if((n = Malloc((p->namelen + 1)*sizeof(Char)))) {memcpy(n, p->name, p->namelen * sizeof(Char)); n[p->namelen] = 0;} else {error(p, "System error"); return 0;}
117 
119  "dtd",
120  "start",
121  "empty",
122  "end",
123  "eof",
124  "pcdata",
125  "pi",
126  "comment",
127  "cdsect",
128  "xml",
129  "error",
130  "warning",
131  "none"
132 };
133 
134 static Entity xml_builtin_entity;
135 static Entity xml_predefined_entities;
136 
137 int ParserInit(void)
138 {
139  static int initialised = 0;
140  Entity e, f;
141  int i;
142  static const Char lt[] = {'l','t',0}, ltval[] = {'&','#','6','0',';',0};
143  static const Char gt[] = {'g','t',0}, gtval[] = {'&','#','6','2',';',0};
144  static const Char amp[] = {'a','m','p',0},
145  ampval[] = {'&','#','3','8',';',0};
146  static const Char apos[] = {'a','p','o','s',0}, aposval[] = {'\'',0};
147  static const Char quot[] = {'q','u','o','t',0}, quotval[] = {'"',0};
148  static const Char *builtins[5][2] = {
149  {lt, ltval}, {gt, gtval}, {amp, ampval},
150  {apos, aposval}, {quot, quotval}
151  };
152  (void)vcid;
153 
154  if(initialised)
155  return 0;
156  initialised = 1;
157 
158  init_charset();
159  init_ctype16();
160  init_stdio16();
161 
162  for(i=0, f=0; i<5; i++, f=e)
163  {
164  e = NewInternalEntity(builtins[i][0], builtins[i][1],
165  xml_builtin_entity, 0, 0, 0);
166  if(!e)
167  return -1;
168  e->next = f;
169  }
170 
171  xml_predefined_entities = e;
172 
173  return 0;
174 }
175 
176 static void skip_whitespace(InputSource s)
177 {
178  int c;
179 
180  while((c = get(s)) != XEOE && is_xml_whitespace(c))
181  ;
182  unget(s);
183 }
184 
185 /*
186  * Skip whitespace and (optionally) the start and end of PEs. Return 1 if
187  * there actually *was* some whitespace or a PE start/end, -1 if
188  * an error occurred, 0 otherwise.
189  */
190 
191 static int skip_dtd_whitespace(Parser p, int allow_pe)
192 {
193  int c;
194  int got_some = 0;
195  InputSource s = p->source;
196 
197  while(1)
198  {
199  c = get(s);
200 
201  if(c == XEOE)
202  {
203  got_some = 1;
204  if(s->parent)
205  {
206  if(!allow_pe)
207  return error(p,
208  "PE end not allowed here in internal subset");
209  if(s->entity->type == ET_external)
210  p->external_pe_depth--;
211  ParserPop(p);
212  s = p->source;
213  }
214  else
215  {
216  unget(s); /* leave the final EOE waiting to be read */
217  return got_some;
218  }
219  }
220  else if(is_xml_whitespace(c))
221  {
222  got_some = 1;
223  }
224  else if(c == '%')
225  {
226  /* this complication is needed for <!ENTITY % ...
227  otherwise we could just assume it was a PE reference. */
228 
229  c = get(s); unget(s);
230  if(c != XEOE && is_xml_namestart(c))
231  {
232  if(!allow_pe)
233  {
234  unget(s); /* For error position */
235  return error(p,
236  "PE ref not allowed here in internal subset");
237  }
238  require(parse_reference(p, 1, 1, 1));
239  s = p->source;
240  if(s->entity->type == ET_external)
241  p->external_pe_depth++;
242  got_some = 1;
243  }
244  else
245  {
246  unget(s);
247  return got_some;
248  }
249  }
250  else
251  {
252  unget(s);
253  return got_some;
254  }
255  }
256 }
257 
258 static int expect(Parser p, int expected, const char8 *where)
259 {
260  int c;
261  InputSource s = p->source;
262 
263  c = get(s);
264  if(c != expected)
265  {
266  unget(s); /* For error position */
267  return error(p, "Expected %s %s, but got %s",
268  escape(expected), where, escape(c));
269  }
270 
271  return 0;
272 }
273 
274 /*
275  * Expects whitespace or the start or end of a PE.
276  */
277 
278 static int expect_dtd_whitespace(Parser p, const char8 *where)
279 {
280  int r = skip_dtd_whitespace(p, p->external_pe_depth > 0);
281 
282  if(r < 0)
283  return -1;
284 
285  if(r == 0)
286  return error(p, "Expected whitespace %s", where);
287 
288  return 0;
289 }
290 
291 static void clear_xbit(XBit xbit)
292 {
293  xbit->type = XBIT_none;
294  xbit->s1 = xbit->s2 = 0;
295  xbit->S1 = xbit->S2 = 0;
296  xbit->attributes = 0;
297  xbit->element_definition = 0;
298 }
299 
300 void FreeXBit(XBit xbit)
301 {
302  Attribute a, b;
303 
304  if(xbit->S1) Free(xbit->S1);
305  if(xbit->S2) Free(xbit->S2);
306  if(xbit->type != XBIT_error && xbit->type != XBIT_warning && xbit->s1)
307  Free(xbit->s1);
308  if(xbit->s2) Free(xbit->s2);
309  for(a = xbit->attributes; a; a = b)
310  {
311  b = a->next;
312  if(a->value) Free(a->value);
313  Free(a);
314  }
315  clear_xbit(xbit);
316 }
317 
318 /*
319  * Returns 1 if the input matches string (and consume the input).
320  * Otherwise returns 0 and leaves the input stream where it was.
321  * Case-sensitivity depends on the CaseInsensitive flag.
322  * A space character at end of string matches any (non-zero) amount of
323  * whitespace; space are treated literally elsewhere.
324  * Never reads beyond an end-of-line, except to consume
325  * extra whitespace when the last character of string is a space.
326  * Never reads beyond end-of-entity.
327  */
328 
329 static int looking_at(Parser p, const char8 *string)
330 {
331  InputSource s = p->source;
332  int c, d;
333  int save = s->next;
334 
335  for(c = *string++; c; c = *string++)
336  {
337  if(at_eol(s))
338  goto fail; /* We would go over a line end */
339 
340  d = get(s);
341 
342  if(c == ' ' && *string == 0)
343  {
344  if(d == XEOE || !is_xml_whitespace(d))
345  goto fail;
346  skip_whitespace(s);
347  }
348  else
349  if((ParserGetFlag(p, CaseInsensitive) &&
350  Toupper(d) != Toupper(c)) ||
351  (!ParserGetFlag(p, CaseInsensitive) && d != c))
352  goto fail;
353  }
354 
355  return 1;
356 
357 fail:
358  s->next = save;
359  return 0;
360 }
361 
362 static int parse_name(Parser p, const char8 *where)
363 {
364  InputSource s = p->source;
365  int c, i;
366 
367  c = get(s);
368  if(c == XEOE || !is_xml_namestart(c))
369  {
370  unget(s); /* For error position */
371  error(p, "Expected name, but got %s %s", escape(c), where);
372  return -1;
373  }
374  i = 1;
375 
376  while(c = get(s), (c != XEOE && is_xml_namechar(c)))
377  i++;
378  unget(s);
379 
380  p->name = s->line + s->next - i;
381  p->namelen = i;
382 
383  return 0;
384 }
385 
386 static int parse_nmtoken(Parser p, const char8 *where)
387 {
388  InputSource s = p->source;
389  int c, i=0;
390 
391  while(c = get(s), (c !=XEOE && is_xml_namechar(c)))
392  i++;
393  unget(s);
394 
395  if(i == 0)
396  return error(p, "Expected nmtoken value, but got %s %s",
397  escape(c), where);
398 
399  p->name = s->line + s->next - i;
400  p->namelen = i;
401 
402  return 0;
403 }
404 
405 /* Escape a character for printing n an error message.
406  NB returns 5 static storage buffers in rotation. */
407 
408 static const char8 *escape(int c)
409 {
410  static char8 buf[5][15];
411  static int bufnum=-1;
412 
413 #if CHAR_SIZE == 8
414  if(c != XEOE)
415  c &= 0xff;
416 #endif
417 
418  bufnum = (bufnum + 1) % 5;
419 
420  if(c == XEOE)
421  return "<EOE>";
422  else if(c >= 33 && c <= 126)
423  sprintf(buf[bufnum], "%c", c);
424  else if(c == ' ')
425  sprintf(buf[bufnum], "<space>");
426  else
427  sprintf(buf[bufnum], "<0x%x>", c);
428 
429  return buf[bufnum];
430 }
431 
432 Parser NewParser(void)
433 {
434  Parser p;
435 
436  if(ParserInit() == -1)
437  return 0;
438 
439  p = Malloc(sizeof(*p));
440  if(!p)
441  return 0;
442  p->state = PS_prolog1;
443  p->document_entity = 0; /* Set at first ParserPush */
444  p->have_dtd = 0;
445  p->standalone = SDD_unspecified;
446  p->flags = 0;
447  p->source = 0;
448  clear_xbit(&p->xbit);
449 #ifndef FOR_LT
450  p->xbit.nchildren = 0; /* These three should never be changed */
451  p->xbit.children = 0;
452  p->xbit.parent = 0;
453 #endif
454  p->pbufsize = p->pbufnext = 0;
455  p->pbuf = 0;
456  p->peeked = 0;
457  p->dtd = NewDtd();
458  p->dtd_callback = p->warning_callback = 0;
459  p->entity_opener = 0;
460  p->callback_arg = 0;
461  p->external_pe_depth = 0;
462 
463  p->element_stack = 0;
464  p->element_stack_alloc = 0;
465  p->element_depth = 0;
466 
467  ParserSetFlag(p, XMLPiEnd, 1);
473  ParserSetFlag(p, XMLLessThan, 1);
481  ParserSetFlag(p, TrustSDD, 1);
484 
485  return p;
486 }
487 
488 void FreeParser(Parser p)
489 {
490  while (p->source)
491  ParserPop(p); /* Will close file */
492 
493  Free(p->pbuf);
494  Free(p->element_stack);
495  Free(p);
496 }
497 
498 InputSource ParserRootSource(Parser p)
499 {
500  InputSource s;
501 
502  for(s=p->source; s && s->parent; s = s->parent)
503  ;
504 
505  return s;
506 }
507 
508 Entity ParserRootEntity(Parser p)
509 {
510  return ParserRootSource(p)->entity;
511 }
512 
513 void ParserSetCallbackArg(Parser p, void *arg)
514 {
515  p->callback_arg = arg;
516 }
517 
519 {
520  p->dtd_callback = cb;
521 }
522 
524 {
525  p->warning_callback = cb;
526 }
527 
529 {
530  p->entity_opener = opener;
531 }
532 
533 #ifndef FOR_LT
534 
535 XBit ReadXTree(Parser p)
536 {
537  XBit bit, tree, child;
538  XBit *children;
539 
540  bit = ReadXBit(p);
541 
542  switch(bit->type)
543  {
544  case XBIT_error:
545  return bit;
546 
547  case XBIT_start:
548  if(!(tree = Malloc(sizeof(*tree))))
549  {
550  error(p, "System error");
551  return &p->xbit;
552  }
553  *tree = *bit;
554  while(1)
555  {
556  child = ReadXTree(p);
557  switch(child->type)
558  {
559  case XBIT_error:
560  FreeXTree(tree);
561  return child;
562 
563  case XBIT_eof:
564  FreeXTree(tree);
565  {
566  error(p, "EOF in element");
567  return &p->xbit;
568  }
569 
570  case XBIT_end:
571  if(child->element_definition != tree->element_definition)
572  {
573  const Char *name1 = tree->element_definition->name,
574  *name2 = child->element_definition->name;
575  FreeXTree(tree);
576  FreeXTree(child);
577  error(p, "Mismatched end tag: expected </%S>, got </%S>",
578  name1, name2);
579  return &p->xbit;
580  }
581  FreeXTree(child);
582  return tree;
583 
584  default:
585  children = Realloc(tree->children,
586  (tree->nchildren + 1) * sizeof(XBit));
587  if(!children)
588  {
589  FreeXTree(tree);
590  FreeXTree(child);
591  error(p, "System error");
592  return &p->xbit;
593  }
594  child->parent = tree;
595  children[tree->nchildren] = child;
596  tree->nchildren++;
597  tree->children = children;
598  break;
599  }
600  }
601 
602  default:
603  if(!(tree = Malloc(sizeof(*tree))))
604  {
605  error(p, "System error");
606  return &p->xbit;
607  }
608  *tree = *bit;
609  return tree;
610  }
611 }
612 
613 void FreeXTree(XBit tree)
614 {
615  int i;
616 
617  for(i=0; i<tree->nchildren; i++)
618  FreeXTree(tree->children[i]);
619 
620  Free(tree->children);
621 
622  FreeXBit(tree);
623 
624  if(tree->type == XBIT_error)
625  /* error "trees" are always in the Parser structure, not malloced */
626  return;
627 
628  Free(tree);
629 }
630 
631 #endif /* (not) FOR_LT */
632 
633 XBit ReadXBit(Parser p)
634 {
635  if(p->peeked)
636  p->peeked = 0;
637  else
638  parse(p);
639 
640  return &p->xbit;
641 }
642 
643 XBit PeekXBit(Parser p)
644 {
645  if(p->peeked)
646  error(p, "Attempt to peek twice");
647  else
648  {
649  parse(p);
650  p->peeked = 1;
651  }
652 
653  return &p->xbit;
654 }
655 
656 int ParserPush(Parser p, InputSource source)
657 {
658  if(!p->source && !p->document_entity)
659  p->document_entity = source->entity;
660 
661  source->parent = p->source;
662  p->source = source;
663 
664  if(source->entity->type == ET_internal)
665  return 0;
666 
667  /* Look at first few bytes of external entities to guess encoding,
668  then look for an XMLDecl or TextDecl. */
669 
670  if(source->entity->encoding == CE_unknown) /* we might already know */
672 
673 #if CHAR_SIZE == 8
674  if(!EncodingIsAsciiSuperset(source->entity->encoding))
675  return error(p, "Unsupported character encoding %s",
676  CharacterEncodingName[source->entity->encoding]);
677 #else
678  if(source->entity->encoding == CE_unknown)
679  return error(p, "Unknown character encoding");
680 #endif
681 
682  get(source); unget(source); /* To get the first line read */
683 
684  source->entity->ml_decl = ML_unspecified;
685  if(looking_at(p, "<?NSL "))
686  return process_nsl_decl(p);
687  if(looking_at(p, "<?xml "))
688  {
689  require(process_xml_decl(p));
690  if(source->entity == p->document_entity &&
691  !source->entity->version_decl)
692  return error(p, "XML declaration in document entity lacked "
693  "version number");
694  if(source->entity != p->document_entity &&
695  source->entity->standalone_decl != SDD_unspecified)
696  return error(p, "Standalone attribute not allowed except in "
697  "document entity");
698  return 0;
699  }
700  else if(!ParserGetFlag(p, XMLStrictWFErrors) && looking_at(p, "<?XML "))
701  {
702  warn(p, "Found <?XML instead of <?xml; switching to case-"
703  "insensitive mode");
705  return process_xml_decl(p);
706  }
707  else
708  return 0;
709 }
710 
711 void ParserPop(Parser p)
712 {
713  InputSource source;
714 
715  source = p->source;
716  Fclose(source->file16);
717  p->source = source->parent;
718 
719  if(source->entity->type == ET_external)
720  Free(source->line);
721  Free(source);
722 }
723 
724 /* Returns true if the source is at EOE. If so, the EOE will have been read. */
725 
726 static int at_eoe(InputSource s)
727 {
728  if(!at_eol(s))
729  return 0;
730  if(s->seen_eoe || get_with_fill(s) == XEOE)
731  return 1;
732  unget(s);
733  return 0;
734 }
735 
736 /* Pops any sources that are at EOE. Leaves source buffer with at least
737  one character in it (except at EOF, where it leaves the EOE unread). */
738 
739 static void pop_while_at_eoe(Parser p)
740 {
741  while(1)
742  {
743  InputSource s = p->source;
744 
745  if(!at_eoe(s))
746  return;
747  if(!s->parent)
748  {
749  unget(s);
750  return;
751  }
752  ParserPop(p);
753  }
754 }
755 
756 void ParserSetFlag(Parser p, ParserFlag flag, int value)
757 {
758  if(value)
759  p->flags |= (1 << flag);
760  else
761  p->flags &= ~(1 << flag);
762 
763  if(flag == XMLPredefinedEntities)
764  {
765  if(value)
766  p->dtd->predefined_entities = xml_predefined_entities;
767  else
768  p->dtd->predefined_entities = 0;
769  }
770 }
771 
772 void ParserPerror(Parser p, XBit bit)
773 {
774  int linenum, charnum;
775  InputSource s;
776 
777  Fprintf(Stderr, "%s: %s\n",
778  bit->type == XBIT_error ? "Error" : "Warning",
779  bit->error_message);
780 
781 
782  for(s=p->source; s; s=s->parent)
783  {
784  if(s->entity->name)
785  Fprintf(Stderr, " in entity \"%S\"", s->entity->name);
786  else
787  Fprintf(Stderr, " in unnamed entity");
788 
789  switch(SourceLineAndChar(s, &linenum, &charnum))
790  {
791  case 1:
792  Fprintf(Stderr, " at line %d char %d of", linenum+1, charnum+1);
793  break;
794  case 0:
795  Fprintf(Stderr, " defined at line %d char %d of",
796  linenum+1, charnum+1);
797  break;
798  case -1:
799  Fprintf(Stderr, " defined in");
800  break;
801  }
802 
803  Fprintf(Stderr, " %s\n", EntityDescription(s->entity));
804  }
805 }
806 
807 
808 static int parse(Parser p)
809 {
810  int c;
811  InputSource s;
812 
813  if(p->state == PS_end || p->state == PS_error)
814  {
815  /* After an error or EOF, just keep returning EOF */
816  p->xbit.type = XBIT_eof;
817  return 0;
818  }
819 
820  clear_xbit(&p->xbit);
821 
822  if(p->state <= PS_prolog2 || p->state == PS_epilog)
823  skip_whitespace(p->source);
824 
825 restart:
826  pop_while_at_eoe(p);
827  s = p->source;
828  SourcePosition(s, &p->xbit.entity, &p->xbit.byte_offset);
829 
830  switch(c = get(s))
831  {
832  case XEOE:
833  if(p->state != PS_epilog)
834  return error(p, "Document ends too soon");
835  p->state = PS_end;
836  p->xbit.type = XBIT_eof;
837  return 0;
838  case '<':
839  return parse_markup(p);
840  case '&':
842  goto pcdata;
843  if(p->state <= PS_prolog2)
844  return error(p, "Entity reference not allowed in prolog");
845  if(looking_at(p, "#"))
846  {
847  /* a character reference - go back and parse as pcdata */
848  unget(s);
849  goto pcdata;
850  }
852  {
853  /* an entity reference - push it and start again */
854  require(parse_reference(p, 0, 1, 1));
855  goto restart;
856  }
857  /* not expanding general entities, so treat as pcdata */
858  goto pcdata;
859  default:
860  pcdata:
861  unget(s);
862  return parse_pcdata(p);
863  }
864 }
865 
866 /* Called after reading '<' */
867 
868 static int parse_markup(Parser p)
869 {
870  InputSource s = p->source;
871  int c = get(s);
872 
873  switch(c)
874  {
875  case '!':
876  if(looking_at(p, "--"))
877  {
879  return parse_comment(p, 0);
880  else
881  {
882  require(parse_comment(p, 1));
883  return parse(p);
884  }
885  }
886  else if(looking_at(p, "DOCTYPE "))
887  return parse_dtd(p);
888  else if(looking_at(p, "[CDATA["))
889  return parse_cdata(p);
890  else
891  return error(p, "Syntax error after <!");
892 
893  case '/':
894  return parse_endtag(p);
895 
896  case '?':
897  return parse_pi(p);
898 
899  default:
900  unget(s);
901  if(!ParserGetFlag(p, XMLLessThan) &&
902  (c == XEOE || !is_xml_namestart(c)))
903  {
904  /* In nSGML, recognise < as stago only if followed by namestart */
905 
906  unget(s); /* put back the < */
907  return parse_pcdata(p);
908  }
909  return parse_starttag(p);
910  }
911 }
912 
913 static int parse_endtag(Parser p)
914 {
915  ElementDefinition def;
916  Entity ent;
917 
918  p->xbit.type = XBIT_end;
919  require(parse_name(p, "after </"));
920  maybe_uppercase_name(p);
921 
923  {
924  if(p->element_depth <= 0)
925  return error(p, "End tag </%.*S> outside of any element",
926  p->namelen, p->name);
927 
928  ent = p->element_stack[--p->element_depth].entity;
929  def = p->element_stack[p->element_depth].definition;
930 
931  if(p->namelen == def->namelen &&
932  memcmp(p->name, def->name, p->namelen * sizeof(Char)) == 0)
933  p->xbit.element_definition = def;
934  else
935  return error(p, "Mismatched end tag: expected </%S>, got </%.*S>",
936  def->name, p->namelen, p->name);
937 
938  if(ent != p->source->entity)
939  return error(p, "Element ends in different entity from that "
940  "in which it starts");
941 
942  if(p->element_depth == 0)
943  p->state = PS_epilog;
944  }
945  else
946  {
947  p->xbit.element_definition = FindElementN(p->dtd, p->name, p->namelen);
948  if(!p->xbit.element_definition)
949  return error(p, "End tag for unknown element %.*S",
950  p->namelen, p->name);
951  }
952 
953  skip_whitespace(p->source);
954  return expect(p, '>', "after name in end tag");
955 }
956 
957 static int parse_starttag(Parser p)
958 {
959  int c;
960 
961  if(p->state == PS_epilog && !ParserGetFlag(p, AllowMultipleElements))
962  return error(p, "Document contains multiple elements");
963 
964  p->state = PS_body;
965 
966  require(parse_name(p, "after <"));
967  maybe_uppercase_name(p);
968 
969  p->xbit.element_definition = FindElementN(p->dtd, p->name, p->namelen);
970  if(!p->xbit.element_definition || p->xbit.element_definition->tentative)
971  {
972  if(p->have_dtd && ParserGetFlag(p, ErrorOnUndefinedElements))
973  return error(p, "Start tag for undeclared element %.*S",
974  p->namelen, p->name);
975  if(p->have_dtd && ParserGetFlag(p, WarnOnUndefinedElements))
976  warn(p, "Start tag for undeclared element %.*S; "
977  "declaring it to have content ANY",
978  p->namelen, p->name);
979  if(p->xbit.element_definition)
980  RedefineElement(p->xbit.element_definition, CT_any, 0);
981  else
982  {
983  if(!(p->xbit.element_definition =
984  DefineElementN(p->dtd, p->name, p->namelen, CT_any, 0)))
985  return error(p, "System error");
986  }
987  }
988 
989  while(1)
990  {
991  InputSource s = p->source;
992 
993  /* We could just do skip_whitespace here, but we will get a
994  better error message if we look a bit closer. */
995 
996  c = get(s);
997  if(c !=XEOE && is_xml_whitespace(c))
998  {
999  skip_whitespace(s);
1000  c = get(s);
1001  }
1002  else if(c != '>' &&
1003  !(ParserGetFlag(p, XMLEmptyTagEnd) && c == '/'))
1004  {
1005  unget(s); /* For error position */
1006  return error(p, "Expected whitespace or tag end in start tag");
1007  }
1008 
1009  if(c == '>')
1010  {
1011  p->xbit.type = XBIT_start;
1012  break;
1013  }
1014 
1015  if((ParserGetFlag(p, XMLEmptyTagEnd)) && c == '/')
1016  {
1017  require(expect(p, '>', "after / in start tag"));
1018  p->xbit.type = XBIT_empty;
1019  break;
1020  }
1021 
1022  unget(s);
1023 
1024  require(parse_attribute(p));
1025  }
1026 
1028  {
1029  if(p->xbit.type == XBIT_start)
1030  {
1031  if(p->element_depth == p->element_stack_alloc)
1032  {
1033  p->element_stack_alloc =
1034  p->element_stack_alloc == 0 ? 20 :
1035  p->element_stack_alloc * 2;
1036  if(!(p->element_stack =
1037  Realloc(p->element_stack,
1038  (p->element_stack_alloc * sizeof(*p->element_stack)))))
1039  return error(p, "System error");
1040  }
1041  p->element_stack[p->element_depth].definition =
1042  p->xbit.element_definition;
1043  p->element_stack[p->element_depth++].entity = p->source->entity;
1044  }
1045  else
1046  if(p->element_depth == 0)
1047  p->state = PS_epilog;
1048  }
1049 
1051  {
1052  AttributeDefinition d;
1053  Attribute a;
1054 
1055  for(d=NextAttributeDefinition(p->xbit.element_definition, 0);
1056  d;
1057  d=NextAttributeDefinition(p->xbit.element_definition, d))
1058  {
1059  if(!d->default_value)
1060  continue;
1061  for(a=p->xbit.attributes; a; a=a->next)
1062  if(a->definition == d)
1063  break;
1064  if(!a)
1065  {
1066  if(!(a = Malloc(sizeof(*a))))
1067  return error(p, "System error");
1068  a->definition = d;
1069  if(!(a->value = Strdup(d->default_value)))
1070  return error(p, "System error");
1071  a->quoted = 1;
1072  a->next = p->xbit.attributes;
1073  p->xbit.attributes = a;
1074  }
1075  }
1076  }
1077 
1078  return 0;
1079 }
1080 
1081 static int parse_attribute(Parser p)
1082 {
1083  InputSource s = p->source;
1084  AttributeDefinition def;
1085  struct attribute *a;
1086  int c;
1087 
1088  require(parse_name(p, "for attribute"));
1089  maybe_uppercase_name(p);
1090 
1091  def = FindAttributeN(p->xbit.element_definition, p->name, p->namelen);
1092  if(!def)
1093  {
1094  if(p->have_dtd && ParserGetFlag(p, ErrorOnUndefinedAttributes))
1095  return error(p, "Undeclared attribute %.*S for element %S",
1096  p->namelen, p->name, p->xbit.element_definition->name);
1097  if(p->have_dtd && ParserGetFlag(p, WarnOnUndefinedAttributes))
1098  warn(p, "Undeclared attribute %.*S for element %S; "
1099  "declaring it as CDATA #IMPLIED",
1100  p->namelen, p->name, p->xbit.element_definition->name);
1101  if(!(def = DefineAttributeN(p->xbit.element_definition,
1102  p->name, p->namelen,
1103  AT_cdata, 0, DT_implied, 0)))
1104  return error(p, "System error");
1105  }
1106 
1107  for(a = p->xbit.attributes; a; a = a->next)
1108  if(a->definition == def)
1109  return error(p, "Repeated attribute %.*S", p->namelen, p->name);
1110 
1111  if(!(a = Malloc(sizeof(*a))))
1112  return error(p, "System error");
1113 
1114  a->value = 0; /* in case of error */
1115  a->next = p->xbit.attributes;
1116  p->xbit.attributes = a;
1117  a->definition = def;
1118 
1119  skip_whitespace(s);
1120  require(expect(p, '=', "after attribute name"));
1121 
1122  skip_whitespace(s);
1123  c = get(s);
1124  unget(s);
1125  switch(c)
1126  {
1127  case '"':
1128  case '\'':
1129  a->quoted = 1;
1130  require(parse_string(p, "in attribute value",
1131  a->definition->type == AT_cdata ? LT_cdata_attr :
1132  LT_tok_attr));
1133  a->value = p->pbuf;
1134  Consume(p->pbuf);
1135  break;
1136  default:
1138  return error(p, "Value of attribute is unquoted");
1139  a->quoted = 0;
1140  require(parse_nmtoken(p, "in unquoted attribute value"));
1141  CopyName(a->value);
1142  break;
1143  }
1144 
1145  return 0;
1146 }
1147 
1148 static int transcribe(Parser p, int back, int count)
1149 {
1150  ExpandBuf(p->pbuf, p->pbufnext + count);
1151  memcpy(p->pbuf + p->pbufnext,
1152  p->source->line + p->source->next - back,
1153  count * sizeof(Char));
1154  p->pbufnext += count;
1155  return 0;
1156 }
1157 
1158 /* Called after pushing back the first character of the pcdata */
1159 
1160 static int parse_pcdata(Parser p)
1161 {
1162  int count = 0;
1163  InputSource s;
1164  Char *buf;
1165  int next, buflen;
1166 
1167  if(p->state <= PS_prolog2)
1168  return error(p, "Character data not allowed in prolog");
1169  if(p->state == PS_epilog)
1170  return error(p, "Character data not allowed after body");
1171 
1172  s = p->source;
1173  buf = s->line;
1174  next = s->next;
1175  buflen = s->line_length;
1176 
1177  p->pbufnext = 0;
1178 
1179  while(1)
1180  {
1181  if(next == buflen)
1182  {
1183  s->next = next;
1184  if(count > 0)
1185  {
1186  require(transcribe(p, count, count));
1187  }
1188  count = 0;
1189  if(at_eoe(s))
1190  {
1191  if(!ParserGetFlag(p, MergePCData))
1192  goto done;
1193  else
1194  pop_while_at_eoe(p);
1195  }
1196  s = p->source;
1197  buf = s->line;
1198  next = s->next;
1199  buflen = s->line_length;
1200  if(next == buflen)
1201  goto done; /* must be EOF */
1202  }
1203 
1204  switch(buf[next++])
1205  {
1206  case '<':
1207  if(!ParserGetFlag(p, XMLLessThan))
1208  {
1209  /* In nSGML, don't recognise < as markup unless it looks ok */
1210  if(next == buflen)
1211  goto deflt;
1212  if(buf[next] != '!' && buf[next] != '/' && buf[next] != '?' &&
1213  !is_xml_namestart(buf[next]))
1214  goto deflt;
1215  }
1216  s->next = next;
1217  if(count > 0)
1218  {
1219  require(transcribe(p, count+1, count));
1220  }
1221  count = 0;
1222  if(!ParserGetFlag(p, ReturnComments) &&
1223  buflen >= next + 3 &&
1224  buf[next] == '!' && buf[next+1] == '-' && buf[next+2] == '-')
1225  {
1226  s->next = next + 3;
1227  require(parse_comment(p, 1));
1228  buflen = s->line_length;
1229  next = s->next;
1230  }
1231  else
1232  {
1233  s->next = next-1;
1234  goto done;
1235  }
1236  break;
1237  case '&':
1239  goto deflt;
1240  if(!ParserGetFlag(p, MergePCData) &&
1241  (p->pbufnext > 0 || count > 0))
1242  {
1243  /* We're returning references as separate bits, and we've
1244  come to one, and we've already got some data to return,
1245  so return what we've got and get the reference next time. */
1246 
1247  s->next = next-1;
1248  if(count > 0)
1249  {
1250  require(transcribe(p, count, count));
1251  }
1252  goto done;
1253  }
1254  if(buflen >= next+1 && buf[next] == '#')
1255  {
1256  /* It's a character reference */
1257 
1258  s->next = next+1;
1259  if(count > 0)
1260  {
1261  require(transcribe(p, count+2, count));
1262  }
1263  count = 0;
1264  require(parse_character_reference(p,
1266  next = s->next;
1267 
1268  if(!ParserGetFlag(p, MergePCData))
1269  goto done;
1270  }
1271  else
1272  {
1273  /* It's a general entity reference */
1274 
1275  s->next = next;
1276  if(count > 0)
1277  {
1278  require(transcribe(p, count+1, count));
1279  }
1280  count = 0;
1281  require(parse_reference(p, 0,
1283  1));
1284  s = p->source;
1285  buf = s->line;
1286  buflen = s->line_length;
1287  next = s->next;
1288 
1289  if(!ParserGetFlag(p, MergePCData))
1290  goto done;
1291  }
1292  break;
1293  case ']':
1294  if(ParserGetFlag(p, XMLMiscWFErrors) &&
1295  buflen >= next + 2 &&
1296  buf[next] == ']' && buf[next+1] == '>')
1297  return error(p, "Illegal character sequence ']]>' in pcdata");
1298  /* fall through */
1299  default:
1300  deflt:
1301  count++;
1302  break;
1303  }
1304  }
1305 
1306  done:
1307  p->pbuf[p->pbufnext++] = 0;
1308  p->xbit.type = XBIT_pcdata;
1309  p->xbit.pcdata_chars = p->pbuf;
1310  Consume(p->pbuf);
1311 
1312  return 0;
1313 }
1314 
1315 /* Called after reading '<!--'. Won't go over an entity end. */
1316 
1317 static int parse_comment(Parser p, int skip)
1318 {
1319  InputSource s = p->source;
1320  int c, c1=0, c2=0;
1321  int count = 0;
1322 
1323  if(!skip)
1324  p->pbufnext = 0;
1325 
1326  while((c = get(s)) != XEOE)
1327  {
1328  count++;
1329  if(c1 == '-' && c2 == '-')
1330  {
1331  if(c == '>')
1332  break;
1333  unget(s); /* For error position */
1334  return error(p, "-- in comment");
1335  }
1336 
1337  if(at_eol(s))
1338  {
1339  if(!skip)
1340  {
1341  require(transcribe(p, count, count));
1342  }
1343  count = 0;
1344  }
1345  c2 = c1; c1 = c;
1346  }
1347 
1348  if(c == XEOE)
1349  return error(p, "EOE in comment");
1350 
1351  if(skip)
1352  return 0;
1353 
1354  require(transcribe(p, count, count-3));
1355  p->pbuf[p->pbufnext++] = 0;
1356  p->xbit.type = XBIT_comment;
1357  p->xbit.comment_chars = p->pbuf;
1358  Consume(p->pbuf);
1359 
1360  return 0;
1361 }
1362 
1363 static int parse_pi(Parser p)
1364 {
1365  InputSource s = p->source;
1366  int c, c1=0;
1367  int count = 0;
1368  Char xml[] = {'x', 'm', 'l', 0};
1369 
1370  require(parse_name(p, "after <?"));
1371  CopyName(p->xbit.pi_name);
1372 
1373  p->pbufnext = 0;
1374 
1375  if(Strcasecmp(p->xbit.pi_name, xml) == 0)
1376  {
1378  return error(p, "Misplaced or wrong-case xml declaration");
1379  else
1380  warn(p, "Misplaced or wrong-case xml declaration; treating as PI");
1381  }
1382 
1383  /* Empty PI? */
1384 
1385  if(looking_at(p, ParserGetFlag(p, XMLPiEnd) ? "?>" : ">"))
1386  {
1387  ExpandBuf(p->pbuf, 0);
1388  goto done;
1389  }
1390 
1391  /* If non-empty, must be white space after name */
1392 
1393  c = get(s);
1394  if(c == XEOE || !is_xml_whitespace(c))
1395  return error(p, "Expected whitespace after PI name");
1396  skip_whitespace(s);
1397 
1398  while((c = get(s)) != XEOE)
1399  {
1400  count++;
1401  if(c == '>' &&
1402  (!ParserGetFlag(p, XMLPiEnd) || c1 == '?'))
1403  break;
1404  if(at_eol(s))
1405  {
1406  require(transcribe(p, count, count));
1407  count = 0;
1408  }
1409  c1 = c;
1410  }
1411 
1412  if(c == XEOE)
1413  return error(p, "EOE in PI");
1414 
1415  require(transcribe(p, count, count-(ParserGetFlag(p, XMLPiEnd) ? 2 : 1)));
1416 done:
1417  p->pbuf[p->pbufnext++] = 0;
1418  p->xbit.type = XBIT_pi;
1419  p->xbit.pi_chars = p->pbuf;
1420  Consume(p->pbuf);
1421 
1422  return 0;
1423 }
1424 
1425 static int parse_string(Parser p, const char8 *where, enum literal_type type)
1426 {
1427  int c, quote;
1428  int count = 0;
1429  InputSource start_source, s;
1430 
1431  s = start_source = p->source;
1432 
1433  quote = get(s);
1434  if(quote != '\'' && quote != '"')
1435  {
1436  unget(s); /* For error position */
1437  return error(p, "Expected quoted string %s, but got %s",
1438  where, escape(quote));
1439  }
1440 
1441  p->pbufnext = 0;
1442 
1443  while(1)
1444  {
1445  switch(c = get(s))
1446  {
1447  case '\r':
1448  case '\n':
1449  case '\t':
1450  if(type == LT_plain || type == LT_entity ||
1452  {
1453  count++;
1454  break;
1455  }
1456  if(count > 0)
1457  {
1458  require(transcribe(p, count+1, count));
1459  }
1460  count = 0;
1461  ExpandBuf(p->pbuf, p->pbufnext+1);
1462  p->pbuf[p->pbufnext++] = ' ';
1463  break;
1464 
1465  case '<':
1466  if((type == LT_tok_attr || type == LT_cdata_attr) &&
1468  return error(p, "Illegal character '<' %s", where);
1469  count++;
1470  break;
1471 
1472  case XEOE:
1473  if(s == start_source)
1474  {
1475  return error(p, "Quoted string goes past entity end");
1476  }
1477  if(count > 0)
1478  {
1479  require(transcribe(p, count, count));
1480  }
1481  count = 0;
1482  ParserPop(p);
1483  s = p->source;
1484  break;
1485 
1486  case '%':
1487  if(type != LT_entity)
1488  {
1489  count++;
1490  break;
1491  }
1492  if(count > 0)
1493  {
1494  require(transcribe(p, count+1, count));
1495  }
1496  count = 0;
1497  if(p->external_pe_depth == 0)
1498  {
1499  unget(s); /* For error position */
1500  return error(p, "PE ref not allowed here in internal subset");
1501  }
1502  require(parse_reference(p, 1, 1, 1));
1503  s = p->source;
1504  break;
1505 
1506  case '&':
1508  goto deflt;
1509  if(type == LT_plain)
1510  {
1511  count++;
1512  break;
1513  }
1514 
1515  if(count > 0)
1516  {
1517  require(transcribe(p, count+1, count));
1518  }
1519  count = 0;
1520  if(looking_at(p, "#"))
1521  require(parse_character_reference(p,
1523  else
1524  {
1525  require(parse_reference(p, 0,
1526  type != LT_entity &&
1529  s = p->source;
1530  }
1531  break;
1532 
1533  default:
1534  deflt:
1535  if(c == quote && p->source == start_source)
1536  goto done;
1537  count++;
1538  }
1539 
1540  if(at_eol(s) && count > 0)
1541  {
1542  require(transcribe(p, count, count));
1543  count = 0;
1544  }
1545  }
1546 
1547 done:
1548  if(count > 0)
1549  require(transcribe(p, count+1, count));
1550  else
1551  ExpandBuf(p->pbuf, p->pbufnext+1);
1552  p->pbuf[p->pbufnext++] = 0;
1553 
1555  {
1556  Char *old, *new;
1557 
1558  new = old = p->pbuf;
1559 
1560  /* Maybe skip leading whitespace */
1561 
1562  while(*old == ' ')
1563  old++;
1564 
1565  /* Translate whitespace to spaces, maybe compressing */
1566 
1567  for( ; *old; old++)
1568  {
1569  if(*old == ' ')
1570  {
1571  /* NB can't be at start because we skipped whitespace */
1572  if(type == LT_tok_attr && new[-1] == ' ')
1573  ;
1574  else
1575  *new++ = ' ';
1576  }
1577  else
1578  *new++ = *old;
1579  }
1580 
1581  /* Maybe trim trailing space (only one possible) */
1582 
1583  if(new > p->pbuf && new[-1] == ' ')
1584  new--;
1585 
1586  *new = 0;
1587  }
1588 
1589  return 0;
1590 }
1591 
1592 static int parse_dtd(Parser p)
1593 {
1594  InputSource s = p->source;
1595  Entity parent = s->entity;
1596  Entity internal_part = 0, external_part = 0;
1597  Char *name;
1598  char8 *publicid = 0, *systemid = 0;
1599  struct xbit xbit;
1600 
1601  xbit = p->xbit; /* copy start position */
1602  xbit.type = XBIT_dtd;
1603 
1604  require(parse_name(p, "for name in dtd"));
1605  CopyName(name);
1606  maybe_uppercase(p, name);
1607 
1608  skip_whitespace(s);
1609  if ( parse_external_id(p, 0, &publicid, &systemid,
1611  ParserGetFlag(p, XMLExternalIDs)) < 0) {
1612  Free(name);
1613  return -1;
1614  }
1615 
1616  if(systemid || publicid)
1617  {
1618  external_part = NewExternalEntity("", publicid, systemid, 0, parent);
1619  if(!external_part)
1620  {
1621  Free(name);
1622  return error(p, "System error");
1623  }
1624  skip_whitespace(s);
1625  }
1626 
1627  if(looking_at(p, "["))
1628  {
1629  int line = s->line_number, cpos = s->next;
1630 
1631  if (read_markupdecls(p) < 0) {
1632  Free(name);
1633  return -1;
1634  }
1635  skip_whitespace(s);
1636  internal_part = NewInternalEntity("", p->pbuf, parent, line, cpos, 1);
1637  Consume(p->pbuf);
1638  if(!internal_part)
1639  {
1640  Free(name);
1641  FreeEntity(external_part);
1642  return error(p, "System error");
1643  }
1644  }
1645 
1646  require(expect(p, '>', "at end of dtd"));
1647 
1648  if(p->state == PS_prolog1)
1649  p->state = PS_prolog2;
1650  else
1651  {
1652  Free(name);
1653  FreeEntity(external_part);
1654  FreeEntity(internal_part);
1655 
1657  return error(p, "Misplaced or repeated DOCTYPE declaration");
1658 
1659  warn(p, "Misplaced or repeated DOCTYPE declaration");
1660  /* Ignore it and return the next bit */
1661  return parse(p);
1662  }
1663 
1664  if(p->dtd->name)
1665  {
1666  Free(name);
1667  FreeEntity(external_part);
1668  FreeEntity(internal_part);
1669 
1670  /* This happens if we manually set the dtd */
1671  return parse(p);
1672  }
1673 
1674  p->dtd->name = name;
1675  p->dtd->internal_part = internal_part;
1676  p->dtd->external_part = external_part;
1677 
1678  if(ParserGetFlag(p, TrustSDD))
1679  {
1680  if(internal_part)
1681  {
1682  ParseDtd(p, internal_part);
1683  if(p->xbit.type == XBIT_error)
1684  return -1;
1685  }
1686  if(external_part && p->standalone != SDD_yes)
1687  {
1688  ParseDtd(p, external_part);
1689  if(p->xbit.type == XBIT_error)
1690  return -1;
1691  }
1692  }
1693 
1694  p->xbit = xbit;
1695  return 0;
1696 }
1697 
1698 static int read_markupdecls(Parser p)
1699 {
1700  InputSource s = p->source;
1701  int depth=1;
1702  int c, d, hyphens=0;
1703  int count = 0;
1704 
1705  p->pbufnext = 0;
1706 
1707  while(1)
1708  {
1709  c = get(s);
1710  if(c == XEOE)
1711  return error(p, "EOE in DTD");
1712  if(c == '-')
1713  hyphens++;
1714  else
1715  hyphens = 0;
1716 
1717  count++;
1718 
1719  switch(c)
1720  {
1721  case ']':
1722  if(--depth == 0)
1723  {
1724  count--; /* We don't want the final ']' */
1725  require(transcribe(p, count+1, count));
1726  p->pbuf[p->pbufnext++] = 0;
1727  return 0;
1728  }
1729  break;
1730 
1731  case '[':
1732  depth++;
1733  break;
1734 
1735  case '"':
1736  case '\'':
1737  while((d = get(s)) != XEOE)
1738  {
1739  count++;
1740  if(at_eol(s))
1741  {
1742  require(transcribe(p, count, count));
1743  count = 0;
1744  }
1745  if(d == c)
1746  break;
1747  }
1748  if(d == XEOE)
1749  return error(p, "EOE in DTD");
1750  break;
1751 
1752  case '-':
1753  if(hyphens < 2)
1754  break;
1755  hyphens = 0;
1756  while((d = get(s)) != XEOE)
1757  {
1758  count++;
1759  if(at_eol(s))
1760  {
1761  require(transcribe(p, count, count));
1762  count = 0;
1763  }
1764  if(d == '-')
1765  hyphens++;
1766  else
1767  hyphens = 0;
1768  if(hyphens == 2)
1769  break;
1770  }
1771  if(d == XEOE)
1772  return error(p, "EOE in DTD");
1773  hyphens = 0;
1774  break;
1775 
1776  default:
1777  break;
1778  }
1779 
1780  if(at_eol(s) && count > 0)
1781  {
1782  require(transcribe(p, count, count));
1783  count = 0;
1784  }
1785  }
1786 }
1787 
1788 static int process_nsl_decl(Parser p)
1789 {
1790  InputSource s = p->source;
1791  int c, count = 0;
1792 
1793  s->entity->ml_decl = ML_nsl;
1794 
1795  /* The default character encoding for nSGML files is ascii-ash */
1796  if(s->entity->encoding == CE_UTF_8)
1797  s->entity->encoding = CE_unspecified_ascii_superset;
1798 
1799  /* Syntax is <?NSL DDB unquoted-filename 0> */
1800 
1801  if(!looking_at(p, "DDB "))
1802  return error(p, "Expected \"DDB\" in NSL declaration");
1803 
1804  while(c = get(s), !is_xml_whitespace(c))
1805  switch(c)
1806  {
1807  case XEOE:
1808  return error(p, "EOE in NSL declaration");
1809 
1810  case '>':
1811  return error(p, "Syntax error in NSL declaration");
1812 
1813  default:
1814  count++;
1815  }
1816 
1817  p->pbufnext = 0;
1818  require(transcribe(p, count+1, count));
1819  p->pbuf[p->pbufnext++] = 0;
1820 
1821  skip_whitespace(s);
1822  if(!looking_at(p, "0>"))
1823  return error(p, "Expected \"0>\" at end of NSL declaration");
1824 
1825  if(!(s->entity->ddb_filename = strdup8(Chartochar8(p->pbuf))))
1826  return error(p, "System error");
1827 
1828  return 0;
1829 }
1830 
1831 static int process_xml_decl(Parser p)
1832 {
1833  InputSource s = p->source;
1834  enum {None, V, E, S} which, last = None;
1835  Char *Value, *cp;
1836  char8 *value;
1838  int c;
1839 
1840  s->entity->ml_decl = ML_xml;
1841 
1842  /* XXX Should save the string buffer because it may already be in use */
1843 
1844  while(!looking_at(p, "?>"))
1845  {
1846  if(looking_at(p, "version"))
1847  which = V;
1848  else if(looking_at(p, "encoding"))
1849  which = E;
1850  else if(looking_at(p, "standalone"))
1851  which = S;
1852  else
1853  return error(p, "Expected \"version\", \"encoding\" or "
1854  "\"standalone\" in XML declaration");
1855 
1856  if(which <= last)
1857  {
1859  return error(p, "Repeated or misordered attributes "
1860  "in XML declaration");
1861  warn(p, "Repeated or misordered attributes in XML declaration");
1862  }
1863  last = which;
1864 
1865  skip_whitespace(s);
1866  require(expect(p, '=', "after attribute name in XML declaration"));
1867  skip_whitespace(s);
1868 
1869  require(parse_string(p, "for attribute value in XML declaration",
1870  LT_plain));
1871 
1872  maybe_uppercase(p, p->pbuf);
1873  Value = p->pbuf;
1874 
1875  if(which == E)
1876  {
1877  if(!is_ascii_alpha(Value[0]))
1878  return error(p, "Encoding name does not begin with letter");
1879  for(cp=Value+1; *cp; cp++)
1880  if(!is_ascii_alpha(*cp) && !is_ascii_digit(*cp) &&
1881  *cp != '.' && *cp != '_' && *cp != '-')
1882  return error(p, "Illegal character %s in encoding name",
1883  escape(*cp));
1884 
1885  value = Chartochar8(Value);
1886 
1887  enc = FindEncoding(value);
1888  if(enc == CE_unknown)
1889  return error(p, "Unknown declared encoding %s", value);
1890 
1891  if(EncodingsCompatible(p->source->entity->encoding, enc, &enc))
1892  {
1893 #if CHAR_SIZE == 8
1894  /* We ignore the declared encoding in 8-bit mode,
1895  and treat it as a random ascii superset. */
1896 #else
1897  p->source->entity->encoding = enc;
1898 #endif
1899  }
1900  else
1901  return error(p, "Declared encoding %s is incompatible with %s "
1902  "which was used to read it",
1903  CharacterEncodingName[enc],
1904  CharacterEncodingName[p->source->entity->encoding]);
1905 
1906  s->entity->encoding_decl = enc;
1907  }
1908 
1909  if(which == S)
1910  {
1911  value = Chartochar8(Value);
1912 
1913  if(str_maybecase_cmp8(p, value, "no") == 0)
1914  p->standalone = SDD_no;
1915  else if(str_maybecase_cmp8(p, value, "yes") == 0)
1916  p->standalone = SDD_yes;
1917  else
1918  return error(p, "Expected \"yes\" or \"no\" "
1919  "for standalone in XML declaration");
1920 
1921  s->entity->standalone_decl = p->standalone;
1922  }
1923 
1924  if(which == V)
1925  {
1926  for(cp=Value; *cp; cp++)
1927  if(!is_ascii_alpha(*cp) && !is_ascii_digit(*cp) &&
1928  *cp != '.' && *cp != '_' && *cp != '-' && *cp != ':')
1929  return error(p, "Illegal character %s in version number",
1930  escape(*cp));
1931 
1932  if(!s->entity->version_decl)
1933  if(!(s->entity->version_decl = strdup8(Chartochar8(Value))))
1934  return error(p, "System error");
1935  }
1936 
1937  c = get(s);
1938  if(c == '?')
1939  unget(s);
1940  else if(!is_xml_whitespace(c))
1941  return error(p, "Expected whitespace or \"?>\" after attribute "
1942  "in XML declaration");
1943  skip_whitespace(s);
1944  }
1945  return 0;
1946 }
1947 
1948 static int parse_cdata(Parser p)
1949 {
1950  InputSource s = p->source;
1951  int c, c1=0, c2=0;
1952  int count = 0;
1953 
1954  if(p->state <= PS_prolog2)
1955  return error(p, "Cdata section not allowed in prolog");
1956  if(p->state == PS_epilog)
1957  return error(p, "Cdata section not allowed after body");
1958 
1959  p->pbufnext = 0;
1960 
1961  while((c = get(s)) != XEOE)
1962  {
1963  count++;
1964  if(c == '>' && c1 == ']' && c2 == ']')
1965  break;
1966  if(at_eol(s))
1967  {
1968  require(transcribe(p, count, count));
1969  count = 0;
1970  }
1971  c2 = c1; c1 = c;
1972  }
1973 
1974  if(c == XEOE)
1975  return error(p, "EOE in CData section");
1976 
1977  require(transcribe(p, count, count-3));
1978  p->pbuf[p->pbufnext++] = 0;
1979  p->xbit.type = XBIT_cdsect;
1980  p->xbit.cdsect_chars = p->pbuf;
1981  Consume(p->pbuf);
1982 
1983  return 0;
1984 }
1985 
1986 XBit ParseDtd(Parser p, Entity e)
1987 {
1988  InputSource source, save;
1989 
1990  if(e->type == ET_external && p->entity_opener)
1991  source = p->entity_opener(e, p->callback_arg);
1992  else
1993  source = EntityOpen(e);
1994  if(!source)
1995  {
1996  error(p, "Couldn't open dtd entity %s", EntityDescription(e));
1997  return &p->xbit;
1998  }
1999 
2000  save = p->source;
2001  p->source = 0;
2002  if(ParserPush(p, source) == -1)
2003  return &p->xbit;
2004 
2005  p->have_dtd = 1;
2006 
2007  p->external_pe_depth = (source->entity->type == ET_external);
2008 
2009  while(parse_markupdecl(p) == 0)
2010  ;
2011 
2012  p->external_pe_depth = 0;
2013 
2014  /* don't restore after error, so user can call ParserPerror */
2015  if(p->xbit.type != XBIT_error)
2016  {
2017  ParserPop(p); /* to free the input source */
2018  p->source = save;
2019  }
2020 
2021  return &p->xbit;
2022 }
2023 
2024 /*
2025  * Returns 0 normally, -1 if error, 1 at EOF.
2026  */
2027 static int parse_markupdecl(Parser p)
2028 {
2029  InputSource s;
2030  int c;
2031  int cur_line, cur_char;
2032  Entity cur_ent;
2033 
2034  if(p->state == PS_error)
2035  return error(p, "Attempt to continue reading DTD after error");
2036 
2037  clear_xbit(&p->xbit);
2038 
2039  require(skip_dtd_whitespace(p, 1)); /* allow PE even in internal subset */
2040  s = p->source;
2041  SourcePosition(s, &p->xbit.entity, &p->xbit.byte_offset);
2042 
2043  cur_ent = s->entity;
2044  cur_line = s->line_number;
2045  cur_char = s->next;
2046 
2047  c = get(s);
2048  switch(c)
2049  {
2050  case XEOE:
2051  p->xbit.type = XBIT_none;
2052  return 1;
2053  case '<':
2054  if(looking_at(p, "!ELEMENT"))
2055  {
2056  require(expect_dtd_whitespace(p, "after ELEMENT"));
2057  return parse_element_decl(p);
2058  }
2059  else if(looking_at(p, "!ATTLIST"))
2060  {
2061  require(expect_dtd_whitespace(p, "after ATTLIST"));
2062  return parse_attlist_decl(p);
2063  }
2064  else if(looking_at(p, "!ENTITY"))
2065  {
2066  require(expect_dtd_whitespace(p, "after ENTITY"));
2067  return parse_entity_decl(p, cur_ent, cur_line, cur_char);
2068  }
2069  else if(looking_at(p, "!NOTATION"))
2070  {
2071  require(expect_dtd_whitespace(p, "after NOTATION"));
2072  return parse_notation_decl(p);
2073  }
2074  else if(looking_at(p, "!["))
2075  return parse_conditional(p);
2076  else if(looking_at(p, "?"))
2077  {
2078  require(parse_pi(p));
2079  if(p->dtd_callback)
2080  p->dtd_callback(&p->xbit, p->callback_arg);
2081  else
2082  FreeXBit(&p->xbit);
2083  return 0;
2084  }
2085  else if(looking_at(p, "!--"))
2086  {
2088  {
2089  require(parse_comment(p, 0));
2090  if(p->dtd_callback)
2091  p->dtd_callback(&p->xbit, p->callback_arg);
2092  else
2093  FreeXBit(&p->xbit);
2094  return 0;
2095  }
2096  else
2097  return parse_comment(p, 1);
2098  }
2099  else
2100  return error(p, "Syntax error after < in dtd");
2101  default:
2102  unget(s); /* For error position */
2103  return error(p, "Expected \"<\" in dtd, but got %s", escape(c));
2104  }
2105 }
2106 
2107 static int parse_reference(Parser p, int pe, int expand, int allow_external)
2108 {
2109  Entity e;
2110  InputSource s;
2111 
2112  require(parse_name(p, pe ? "for parameter entity" : "for entity"));
2113  require(expect(p, ';', "after entity name"));
2114 
2115  if(!expand)
2116  return transcribe(p, 1 + p->namelen + 1, 1 + p->namelen + 1);
2117 
2118  e = FindEntityN(p->dtd, p->name, p->namelen, pe);
2119  if(!e)
2120  {
2121  Char *buf;
2122  Char *q;
2123  int i;
2124 
2126  return error(p, "Undefined%s entity %.*S",
2127  pe ? " parameter" : "" ,
2128  p->namelen > 50 ? 50 : p->namelen, p->name);
2129 
2130  warn(p, "Undefined%s entity %.*S",
2131  pe ? " parameter" : "",
2132  p->namelen > 50 ? 50 : p->namelen, p->name);
2133 
2134  /* Fake a definition for it */
2135 
2136  buf = Malloc((5 + p->namelen + 1 + 1) * sizeof(Char));
2137  if(!buf)
2138  return error(p, "System error");
2139  q = buf;
2140  *q++ = '&'; *q++ = '#'; *q++ = '3'; *q++ = '8'; *q++ = ';';
2141  for(i=0; i<p->namelen; i++)
2142  *q++ = p->name[i];
2143  *q++ = ';';
2144  *q++ = 0;
2145 
2146  if(!(e = NewInternalEntityN(p->name, p->namelen, buf, 0, 0, 0, 0)))
2147  return error(p, "System error");
2148  if(!DefineEntity(p->dtd, e, 0))
2149  return error(p, "System error");
2150  }
2151 
2152  if(!allow_external && e->type == ET_external)
2153  return error(p, "Illegal reference to external entity");
2154 
2155  for(s = p->source; s; s = s->parent)
2156  if(s->entity == e)
2157  return error(p, "Recursive reference to entity \"%S\"", e->name);
2158 
2159  if(e->type == ET_external && p->entity_opener)
2160  s = p->entity_opener(e, p->callback_arg);
2161  else
2162  s = EntityOpen(e);
2163  if(!s)
2164  return error(p, "Couldn't open entity %S, %s",
2165  e->name, EntityDescription(e));
2166 
2167  require(ParserPush(p, s));
2168 
2169  return 0;
2170 }
2171 
2172 static int parse_character_reference(Parser p, int expand)
2173 {
2174  InputSource s = p->source;
2175  int c, base = 10;
2176  int count = 0;
2177  unsigned int code = 0;
2178  Char *ch = s->line + s->next;
2179 
2180  if(looking_at(p, "x"))
2181  {
2182  ch++;
2183  base = 16;
2184  }
2185 
2186  while((c = get(s)) != ';')
2187  {
2188  if((c >= '0' && c <= '9') ||
2189  (base == 16 && ((c >= 'A' && c <= 'F') ||
2190  (c >= 'a' && c <= 'f'))))
2191  count++;
2192  else
2193  {
2194  unget(s); /* For error position */
2195  return error(p,
2196  "Illegal character %s in base-%d character reference",
2197  escape(c), base);
2198  }
2199  }
2200 
2201  if(!expand)
2202  return transcribe(p, 2 + (base == 16) + count + 1,
2203  2 + (base == 16) + count + 1);
2204 
2205  while(count-- > 0)
2206  {
2207  c = *ch++;
2208  if(c >= '0' && c <= '9')
2209  code = code * base + (c - '0');
2210  else if(c >= 'A' && c <= 'F')
2211  code = code * base + 10 + (c - 'A');
2212  else
2213  code = code * base + 10 + (c - 'a');
2214  }
2215 
2216 #if CHAR_SIZE == 8
2217  if(code > 255 || !is_xml_legal(code))
2218  {
2220  return error(p, "0x%x is not a valid 8-bit XML character", code);
2221  else
2222  warn(p, "0x%x is not a valid 8-bit XML character; ignored", code);
2223  return 0;
2224  }
2225 #else
2226  if(!is_xml_legal(code))
2227  {
2229  return error(p, "0x%x is not a valid UTF-16 XML character", code);
2230  else
2231  warn(p, "0x%x is not a valid UTF-16 XML character; ignored", code);
2232  return 0;
2233  }
2234 
2235  if(code >= 0x10000)
2236  {
2237  /* Use surrogates */
2238 
2239  ExpandBuf(p->pbuf, p->pbufnext+2);
2240  code -= 0x10000;
2241 
2242  p->pbuf[p->pbufnext++] = (code >> 10) + 0xd800;
2243  p->pbuf[p->pbufnext++] = (code & 0x3ff) + 0xdc00;
2244 
2245  return 0;
2246  }
2247 #endif
2248 
2249  ExpandBuf(p->pbuf, p->pbufnext+1);
2250  p->pbuf[p->pbufnext++] = code;
2251 
2252  return 0;
2253 }
2254 
2255 /* Called after reading '<!ELEMENT ' */
2256 
2257 static int parse_element_decl(Parser p)
2258 {
2259  Char *name;
2260  ContentType type;
2261  ElementDefinition def;
2262 #if 1
2263  ContentParticle cp;
2264 #else
2265  int c;
2266  Char pcdata[] = {'#','P','C','D','A','T','A',0};
2267 #endif
2268  Char *content = 0;
2269 
2270  require(parse_name(p, "for name in element declaration"));
2271  CopyName(name);
2272  maybe_uppercase(p, name);
2273 
2274  if (expect_dtd_whitespace(p, "after name in element declaration") <0) {
2275  Free(name);
2276  return -1;
2277  }
2278 
2279  if(looking_at(p, "EMPTY"))
2280  {
2281  type = CT_empty;
2282  content = 0;
2283  }
2284  else if(looking_at(p, "ANY"))
2285  {
2286  type = CT_any;
2287  content = 0;
2288  }
2289  else
2290 #if 1
2291  if(looking_at(p, "("))
2292  {
2293  unget(p->source);
2294  if(!(cp = parse_cp(p)) ||
2295  check_content_decl(p, cp) < 0 ||
2296  !(content = stringify_cp(cp)))
2297  {
2298  FreeContentParticle(cp);
2299  Free(content);
2300  Free(name);
2301  return -1;
2302  }
2303 
2304  if(cp->type == CP_choice && cp->children[0]->type == CP_pcdata)
2305  type = CT_mixed;
2306  else
2307  type = CT_element;
2308  {
2309  }
2310  FreeContentParticle(cp); /* XXX */
2311  }
2312  else
2313  {
2314  Free(name);
2315  return error(p, "Expected \"EMPTY\", \"ANY\", or \"(\" after name in "
2316  "element declaration");
2317  }
2318 #else
2319  {
2320  /* Don't really parse here... maybe improve sometime */
2321 
2322  int count = 0;
2323 
2324  p->pbufnext = 0;
2325 
2326  while((c = get(p->source)) != '>')
2327  {
2328  switch(c)
2329  {
2330  case XEOE:
2331  if(count > 0)
2332  require(transcribe(p, count, count));
2333  if(!p->source->parent)
2334  return error(p, "EOE in element declaration");
2335  ParserPop(p);
2336  count = 0;
2337  break;
2338  case '%':
2339  if(count > 0)
2340  require(transcribe(p, count+1, count));
2341  if(p->external_pe_depth == 0)
2342  {
2343  unget(p->source); /* For error position */
2344  return error(p,
2345  "PE ref not allowed here in internal subset");
2346  }
2347  require(parse_reference(p, 1, 1, 1));
2348  count = 0;
2349  break;
2350  default:
2351  count++;
2352  if(at_eol(p->source))
2353  {
2354  require(transcribe(p, count, count));
2355  count = 0;
2356  }
2357  }
2358  }
2359 
2360  unget(p->source);
2361  require(transcribe(p, count, count));
2362  p->pbuf[p->pbufnext++] = 0;
2363 
2364  if(Strstr(p->pbuf, pcdata))
2365  type = CT_mixed;
2366  else
2367  type = CT_element;
2368 
2369  content = p->pbuf;
2370  Consume(p->pbuf);
2371  }
2372 #endif
2373  if (skip_dtd_whitespace(p, p->external_pe_depth > 0) < 0) {
2374  Free(name);
2375  return -1;
2376  }
2377  require(expect(p, '>', "at end of element declaration"));
2378 
2379  if((def = FindElement(p->dtd, name)))
2380  {
2381  if(def->tentative)
2382  RedefineElement(def, type, content);
2383  else
2384  {
2385  Free(content);
2387  warn(p, "Ignoring redeclaration of element %S", name);
2388  }
2389  }
2390  else
2391  if (!DefineElement(p->dtd, name, type, content)) {
2392  return error(p, "System error");
2393  };
2394 
2395  Free(name);
2396 
2397  return 0;
2398 }
2399 
2400 /* Content model parsing */
2401 
2402 static ContentParticle parse_cp(Parser p)
2403 {
2404  ContentParticle cp;
2405 
2406  if(looking_at(p, "("))
2407  {
2408  if(!(cp = parse_choice_or_seq(p)))
2409  return 0;
2410  }
2411  else if(looking_at(p, "#PCDATA"))
2412  {
2413  if(!(cp = Malloc(sizeof(*cp))))
2414  {
2415  error(p, "System error");
2416  return 0;
2417  }
2418 
2419  cp->type = CP_pcdata;
2420  }
2421  else
2422  {
2423  if(parse_name(p, "in content declaration") < 0)
2424  return 0;
2425 
2426  if(!(cp = Malloc(sizeof(*cp))))
2427  {
2428  error(p, "System error");
2429  return 0;
2430  }
2431 
2432  cp->type = CP_name;
2433  CopyName0(cp->name);
2434  }
2435 
2436  if(looking_at(p, "*"))
2437  cp->repetition = '*';
2438  else if(looking_at(p, "+"))
2439  cp->repetition = '+';
2440  else if(looking_at(p, "?"))
2441  cp->repetition = '?';
2442  else
2443  cp->repetition = 0;
2444 
2445  return cp;
2446 }
2447 
2448 /* Called after '(' */
2449 
2450 static ContentParticle parse_choice_or_seq(Parser p)
2451 {
2452  ContentParticle cp, cp1;
2453 
2454 
2455  require0(skip_dtd_whitespace(p, p->external_pe_depth > 0));
2456 
2457  if(!(cp1 = parse_cp(p)))
2458  return 0;
2459 
2460  require0(skip_dtd_whitespace(p, p->external_pe_depth > 0));
2461 
2462  if(!(cp = parse_choice_or_seq_1(p, 1, 0)))
2463  FreeContentParticle(cp1);
2464  else
2465  cp->children[0] = cp1;
2466 
2467  return cp;
2468 }
2469 
2470 /* Called before '|', ',', or ')' */
2471 
2472 static ContentParticle parse_choice_or_seq_1(Parser p, int nchildren, char sep)
2473 {
2474  ContentParticle cp = 0, cp1;
2475  int nsep = get(p->source);
2476 
2477  if(nsep == ')')
2478  {
2479  /* We've reached the end */
2480 
2481  if(!(cp = Malloc(sizeof(*cp))) ||
2482  !(cp->children = Malloc(nchildren * sizeof(cp))))
2483  {
2484  Free(cp);
2485  error(p, "System error");
2486  return 0;
2487  }
2488 
2489  /* The standard does not specify whether '(foo)' is a choice or a
2490  sequence. We make it a choice so that (#PCDATA) comes out as
2491  a choice, like other mixed models. */
2492 
2493  cp->type = sep == ',' ? CP_seq : CP_choice;
2494  cp->nchildren = nchildren;
2495 
2496  return cp;
2497  }
2498 
2499  if(nsep != '|' && nsep != ',')
2500  {
2501  error(p, "Expected | or , or ) in content declaration, got %s",
2502  escape(nsep));
2503  return 0;
2504  }
2505 
2506  if(sep && nsep != sep)
2507  {
2508  error(p, "Content particle contains both | and ,");
2509  return 0;
2510  }
2511 
2512  require0(skip_dtd_whitespace(p, p->external_pe_depth > 0));
2513 
2514  if(!(cp1 = parse_cp(p)))
2515  return 0;
2516 
2517  require0(skip_dtd_whitespace(p, p->external_pe_depth > 0));
2518 
2519  if(!(cp = parse_choice_or_seq_1(p, nchildren+1, (char)nsep)))
2520  FreeContentParticle(cp1);
2521  else
2522  cp->children[nchildren] = cp1;
2523 
2524  return cp;
2525 }
2526 
2527 /* Check content particle matches Mixed or children */
2528 
2529 static int check_content_decl(Parser p, ContentParticle cp)
2530 {
2531  int i;
2532 
2533  if(cp->type == CP_choice && cp->children[0]->type == CP_pcdata)
2534  {
2535  for(i=1; i<cp->nchildren; i++)
2536  if(cp->children[i]->type != CP_name)
2537  return error(p, "Invalid mixed content declaration");
2538 
2539  if(cp->repetition != '*' &&
2540  !(cp->nchildren == 1 && cp->repetition == 0))
2541  return error(p, "Invalid mixed content declaration");
2542 
2543  return 0;
2544  }
2545  else
2546  return check_content_decl_1(p, cp);
2547 }
2548 
2549 static int check_content_decl_1(Parser p, ContentParticle cp)
2550 {
2551  int i;
2552 
2553  switch(cp->type)
2554  {
2555  case CP_pcdata:
2556  return error(p, "Misplaced #PCDATA in content declaration");
2557  case CP_seq:
2558  case CP_choice:
2559  for(i=0; i<cp->nchildren; i++)
2560  if(check_content_decl_1(p, cp->children[i]) < 0)
2561  return -1;
2562  return 0;
2563  default:
2564  return 0;
2565  }
2566 }
2567 
2568 /* Reconstruct the content model as a string */
2569 
2570 static Char *stringify_cp(ContentParticle cp)
2571 {
2572  int size = size_cp(cp);
2573  Char *s;
2574  FILE16 *f;
2575 
2576  if(!(s = Malloc((size+1) * sizeof(Char))) ||
2577  !(f = MakeFILE16FromString(s, (size + 1) * sizeof(Char), "w")))
2578  {
2579  Free(s);
2580  return 0;
2581  }
2582 
2583  print_cp(cp, f);
2584  s[size] = 0;
2585 
2586  Fclose(f);
2587 
2588  return s;
2589 }
2590 
2591 static void print_cp(ContentParticle cp, FILE16 *f)
2592 {
2593  int i;
2594 
2595  switch(cp->type)
2596  {
2597  case CP_pcdata:
2598  Fprintf(f, "#PCDATA");
2599  break;
2600  case CP_name:
2601  Fprintf(f, "%S", cp->name);
2602  break;
2603  case CP_seq:
2604  case CP_choice:
2605  Fprintf(f, "(");
2606  for(i=0; i<cp->nchildren; i++)
2607  {
2608  if(i != 0)
2609  Fprintf(f, cp->type == CP_seq ? "," : "|");
2610  print_cp(cp->children[i], f);
2611  }
2612  Fprintf(f, ")");
2613  break;
2614  }
2615 
2616  if(cp->repetition)
2617  Fprintf(f, "%c", cp->repetition);
2618 }
2619 
2620 static int size_cp(ContentParticle cp)
2621 {
2622  int i, s;
2623 
2624  switch(cp->type)
2625  {
2626  case CP_pcdata:
2627  s = 7;
2628  break;
2629  case CP_name:
2630  s = Strlen(cp->name);
2631  break;
2632  default:
2633  s = 2;
2634  for(i=0; i<cp->nchildren; i++)
2635  {
2636  if(i != 0)
2637  s++;
2638  s += size_cp(cp->children[i]);
2639  }
2640  break;
2641  }
2642 
2643  if(cp->repetition)
2644  s++;
2645 
2646  return s;
2647 }
2648 
2649 void FreeContentParticle(ContentParticle cp)
2650 {
2651  int i;
2652 
2653  if(!cp)
2654  return;
2655 
2656  switch(cp->type)
2657  {
2658  case CP_pcdata:
2659  break;
2660  case CP_name:
2661  Free(cp->name);
2662  break;
2663  case CP_seq:
2664  case CP_choice:
2665  for(i=0; i<cp->nchildren; i++)
2666  FreeContentParticle(cp->children[i]);
2667  Free(cp->children);
2668  break;
2669  }
2670 
2671  Free(cp);
2672 }
2673 
2674 /* Called after reading '<!ATTLIST ' */
2675 
2676 static int parse_attlist_decl(Parser p)
2677 {
2678  Char *name;
2679  ElementDefinition element;
2682  Char **allowed_values, *t;
2683  Char *default_value;
2684  int nvalues, i;
2685 
2686  require(parse_name(p, "for name in attlist declaration"));
2687  CopyName(name);
2688  maybe_uppercase(p, name);
2689 
2690  if(!(element = FindElement(p->dtd, name)))
2691  {
2692  if(!(element = TentativelyDefineElement(p->dtd, name)))
2693  return error(p, "System error");
2694  }
2695  Free(name);
2696 
2697  require(expect_dtd_whitespace(p,
2698  "after element name in attlist declaration"));
2699 
2700  while(!looking_at(p, ">"))
2701  {
2702  require(skip_dtd_whitespace(p, p->external_pe_depth > 0));
2703  require(parse_name(p, "for attribute in attlist declaration"));
2704  CopyName(name);
2705  maybe_uppercase(p, name);
2706 
2707  require(expect_dtd_whitespace(p, "after name in attlist declaration"));
2708 
2709  if(looking_at(p, "CDATA"))
2710  type = AT_cdata;
2711  else if(looking_at(p, "IDREFS"))
2712  type = AT_idrefs;
2713  else if(looking_at(p, "IDREF"))
2714  type = AT_idref;
2715  else if(looking_at(p, "ID"))
2716  type = AT_id;
2717  else if(looking_at(p, "ENTITIES"))
2718  type = AT_entities;
2719  else if(looking_at(p, "ENTITY"))
2720  type = AT_entity;
2721  else if(looking_at(p, "NMTOKENS"))
2722  type = AT_nmtokens;
2723  else if(looking_at(p, "NMTOKEN"))
2724  type = AT_nmtoken;
2725  else if(looking_at(p, "NOTATION"))
2726  type = AT_notation;
2727  else
2728  type = AT_enumeration;
2729 
2730  if(type != AT_enumeration)
2731  {
2732  if(expect_dtd_whitespace(p, "after attribute type") < 0) {
2733  Free(name);
2734  return -1;
2735  }
2736  }
2737 
2738  if(type == AT_notation || type == AT_enumeration)
2739  {
2740  require(expect(p, '(',
2741  "or keyword for type in attlist declaration"));
2742 
2743  nvalues = 0;
2744  p->pbufnext = 0;
2745  do
2746  {
2747  require(skip_dtd_whitespace(p, p->external_pe_depth > 0));
2748  if(type == AT_notation)
2749  require(parse_name(p,
2750  "for notation value in attlist declaration"));
2751  else
2752  require(parse_nmtoken(p,
2753  "for enumerated value in attlist declaration"));
2754  maybe_uppercase_name(p);
2755  ExpandBuf(p->pbuf, p->pbufnext + p->namelen + 1);
2756  memcpy(p->pbuf+p->pbufnext,
2757  p->name,
2758  p->namelen * sizeof(Char));
2759  p->pbuf[p->pbufnext + p->namelen] = 0;
2760  p->pbufnext += (p->namelen + 1);
2761  nvalues++;
2762  if (skip_dtd_whitespace(p, p->external_pe_depth > 0) < 0) {
2763  Free(name);
2764  return -1;
2765  }
2766  }
2767  while(looking_at(p, "|"));
2768 
2769  require(expect(p, ')',
2770  "at end of enumerated value list in attlist declaration"));
2771  if(expect_dtd_whitespace(p, "after enumerated value list "
2772  "in attlist declaration") < 0) {
2773  Free(name);
2774  return -1;
2775  }
2776 
2777  allowed_values = Malloc((nvalues+1)*sizeof(Char *));
2778  if(!allowed_values)
2779  return error(p, "System error");
2780  for(i=0, t=p->pbuf; i<nvalues; i++)
2781  {
2782  allowed_values[i] = t;
2783  while(*t++)
2784  ;
2785  }
2786  allowed_values[nvalues] = 0;
2787 
2788  Consume(p->pbuf);
2789  }
2790  else
2791  allowed_values = 0;
2792 
2793  if(looking_at(p, "#REQUIRED"))
2794  default_type = DT_required;
2795  else if(looking_at(p, "#IMPLIED"))
2796  default_type = DT_implied;
2797  else if(looking_at(p, "#FIXED"))
2798  {
2799  default_type = DT_fixed;
2800  if (expect_dtd_whitespace(p, "after #FIXED") <0) {
2801  Free(allowed_values);
2802  Free(name);
2803  return -1;
2804  }
2805  }
2806  else
2807  default_type = DT_none;
2808 
2809  if(default_type == DT_fixed || default_type == DT_none)
2810  {
2811  require(parse_string(p,
2812  "for default value in attlist declaration",
2813  type == AT_cdata ? LT_cdata_attr :
2814  LT_tok_attr));
2815  default_value = p->pbuf;
2816  Consume(p->pbuf);
2817  if(type != AT_cdata && type != AT_entity && type != AT_entities)
2818  maybe_uppercase(p, default_value);
2819  }
2820  else
2821  default_value = 0;
2822 
2823  if (skip_dtd_whitespace(p, p->external_pe_depth > 0) < 0 ) {
2824  Free(allowed_values);
2825  Free(name);
2826  return -1;
2827  }
2828 
2829  if(FindAttribute(element, name))
2830  {
2832  warn(p, "Ignoring redeclaration of attribute %S", name);
2833  if(allowed_values)
2834  {
2835  Free(allowed_values[0]);
2836  Free(allowed_values);
2837  }
2838  if(default_value)
2839  Free(default_value);
2840  }
2841  else
2842  if(!DefineAttribute(element, name, type, allowed_values,
2843  default_type, default_value))
2844  return error(p, "System error");
2845 
2846  Free(name);
2847  }
2848 
2849  return 0;
2850 }
2851 
2852 /* Used for external dtd part, entity definitions and notation definitions. */
2853 /* NB PE references are not allowed here (why not?) */
2854 
2855 static int parse_external_id(Parser p, int required,
2856  char8 **publicid, char8 **systemid,
2857  int preq, int sreq)
2858 {
2859  InputSource s = p->source;
2860  int c;
2861  Char *cp;
2862 
2863  *publicid = 0;
2864  *systemid = 0;
2865 
2866  if(looking_at(p, "SYSTEM"))
2867  {
2868  if(!sreq)
2869  {
2870  skip_whitespace(s);
2871  c = get(s); unget(s);
2872  if(c != '"' && c != '\'')
2873  return 0;
2874  }
2875  else
2876  require(expect_dtd_whitespace(p, "after SYSTEM"));
2877 
2878  require(parse_string(p, "for system ID", LT_plain));
2879  if(!(*systemid = strdup8(Chartochar8(p->pbuf))))
2880  return error(p, "System error");
2881  }
2882  else if(looking_at(p, "PUBLIC"))
2883  {
2884  if(!preq && !sreq)
2885  {
2886  skip_whitespace(s);
2887  c = get(s); unget(s);
2888  if(c != '"' && c != '\'')
2889  return 0;
2890  }
2891  else
2892  require(expect_dtd_whitespace(p, "after PUBLIC"));
2893 
2894  require(parse_string(p, "for public ID", LT_plain));
2895 
2896  for(cp=p->pbuf; *cp; cp++)
2897  if(!is_ascii_alpha(*cp) && !is_ascii_digit(*cp) &&
2898  strchr8("-'()+,./:=?;!*#@$_% \r\n", *cp) == 0)
2899  return error(p, "Illegal character %s in public id",
2900  escape(*cp));
2901 
2902  if(!(*publicid = strdup8(Chartochar8(p->pbuf))))
2903  return error(p, "System error");
2904 
2905  if(!sreq)
2906  {
2907  skip_whitespace(s);
2908  c = get(s); unget(s);
2909  if(c != '"' && c != '\'')
2910  return 0;
2911  }
2912  else
2913  require(expect_dtd_whitespace(p, "after public id"));
2914 
2915  require(parse_string(p, "for system ID", LT_plain));
2916  if(!(*systemid = strdup8(Chartochar8(p->pbuf))))
2917  return error(p, "System error");
2918  }
2919  else if(required)
2920  return error(p, "Missing or invalid external ID");
2921 
2922  return 0;
2923 }
2924 
2925 /* Called after reading '<!ENTITY ' */
2926 
2927 static int parse_entity_decl(Parser p, Entity ent, int line, int chpos)
2928 {
2929  Entity e, old;
2930  int pe, t;
2931  Char *name;
2932 
2933  pe = looking_at(p, "%"); /* If it were a PE ref, we would
2934  already have pushed it */
2935 
2936  require(skip_dtd_whitespace(p, p->external_pe_depth > 0));
2937  require(parse_name(p, "for name in entity declaration"));
2938  CopyName(name);
2939  if (expect_dtd_whitespace(p, "after name in entity declaration") <0) {
2940  Free(name);
2941  return -1;
2942  }
2943 
2944  if(looking_at(p, "'") || looking_at(p, "\""))
2945  {
2946  Char *value;
2947 
2948  unget(p->source);
2949  if(parse_string(p, "for value in entity declaration", LT_entity) <0) {
2950  Free(name);
2951  return -1;
2952  }
2953  value = p->pbuf;
2954  Consume(p->pbuf);
2955 
2956  if(!(e = NewInternalEntity(name, value, ent, line, chpos, 0)))
2957  return error(p, "System error");
2958  }
2959  else
2960  {
2961  char8 *publicid, *systemid;
2962  NotationDefinition notation = 0;
2963 
2964  if (parse_external_id(p, 1, &publicid, &systemid, 1, 1) < 0) {
2965  Free(name);
2966  return -1;
2967  }
2968 
2969  if ((t = skip_dtd_whitespace(p, p->external_pe_depth > 0)) < 0) {
2970  Free(name);
2971  return -1;
2972  }
2973  if(looking_at(p, "NDATA"))
2974  {
2975  if(t == 0)
2976  return error(p, "Whitespace missing before NDATA");
2977  if(pe)
2978  return error(p, "NDATA not allowed for parameter entity");
2979  if (expect_dtd_whitespace(p, "after NDATA") <0) {
2980  Free(name);
2981  Free(systemid);
2982  return -1;
2983  }
2984  require(parse_name(p, "for notation name in entity declaration"));
2985  maybe_uppercase_name(p);
2986  notation = FindNotationN(p->dtd, p->name, p->namelen);
2987  if(!notation)
2988  {
2989  notation =
2990  TentativelyDefineNotationN(p->dtd, p->name, p->namelen);
2991  if(!notation)
2992  return error(p, "System error");
2993  }
2994  }
2995 
2996  if(!(e = NewExternalEntity(name, publicid, systemid, notation, ent)))
2997  return error(p, "System error");
2998  }
2999 
3000  Free(name);
3001 
3002  require(skip_dtd_whitespace(p, p->external_pe_depth > 0));
3003  require(expect(p, '>', "at end of entity declaration"));
3004 
3005  if((old = FindEntity(p->dtd, e->name, pe)) &&
3006  old->parent != xml_builtin_entity)
3007  {
3009  warn(p, "Ignoring redefinition of%s entity %S",
3010  pe ? " parameter" : "", e->name);
3011  }
3012  else
3013  if(!DefineEntity(p->dtd, e, pe))
3014  return error(p, "System error");
3015 
3016  //Free(e);
3017  return 0;
3018 }
3019 
3020 /* Called after reading '<!NOTATION ' */
3021 
3022 static int parse_notation_decl(Parser p)
3023 {
3024  Char *name;
3025  char8 *publicid, *systemid;
3026  NotationDefinition def;
3027 
3028  require(parse_name(p, "for name in notation declaration"));
3029  CopyName(name);
3030  maybe_uppercase(p, name);
3031 
3032  if (expect_dtd_whitespace(p, "after name in notation declaration") < 0) {
3033  Free(name);
3034  return -1;
3035  }
3036 
3037  if (parse_external_id(p, 1, &publicid, &systemid, 1, 0) < 0) {
3038  Free(name);
3039  return -1;
3040  }
3041 
3042  if (skip_dtd_whitespace(p, p->external_pe_depth > 0) < 0) {
3043  Free(name);
3044  return -1;
3045  }
3046 
3047  if (expect(p, '>', "at end of notation declaration") < 0) {
3048  Free(name);
3049  return -1;
3050  }
3051 
3052  if((def = FindNotation(p->dtd, name)))
3053  {
3054  if(def->tentative)
3055  RedefineNotation(def, publicid, systemid);
3056  else
3058  {
3059  warn(p, "Ignoring redefinition of notation %S", name);
3060  if(publicid) Free(publicid);
3061  if(systemid) Free(systemid);
3062  }
3063  }
3064  else
3065  {
3066  if(!DefineNotation(p->dtd, name, publicid, systemid))
3067  return error(p, "System error");
3068  }
3069 
3070  Free(name);
3071 
3072  return 0;
3073 }
3074 
3075 static int parse_conditional(Parser p)
3076 {
3077  int depth=1;
3078 
3079  if(p->external_pe_depth == 0)
3080  return error(p, "Conditional section not allowed in internal subset");
3081 
3082  require(skip_dtd_whitespace(p, p->external_pe_depth > 0));
3083  if(looking_at(p, "INCLUDE"))
3084  {
3085  require(skip_dtd_whitespace(p, p->external_pe_depth > 0));
3086  require(expect(p, '[', "at start of conditional section"));
3087  require(skip_dtd_whitespace(p, p->external_pe_depth > 0));
3088  while(!looking_at(p, "]"))
3089  {
3090  switch(parse_markupdecl(p))
3091  {
3092  case 1:
3093  return error(p, "EOF in conditional section");
3094  case -1:
3095  return -1;
3096  }
3097  require(skip_dtd_whitespace(p, p->external_pe_depth > 0));
3098  }
3099 
3100  if(!looking_at(p, "]>"))
3101  return error(p, "]> required after ] in conditional section");
3102  }
3103  else if(looking_at(p, "IGNORE"))
3104  {
3105  /* Easy, because ]]> not even allowed in strings! */
3106 
3107  require(skip_dtd_whitespace(p, p->external_pe_depth > 0));
3108  require(expect(p, '[', "at start of conditional section"));
3109 
3110  while(depth > 0)
3111  {
3112  switch(get(p->source))
3113  {
3114  case XEOE:
3115  if(p->source->parent)
3116  ParserPop(p);
3117  else
3118  return error(p, "EOE in ignored conditional section");
3119  break;
3120  case '<':
3121  if(looking_at(p, "!["))
3122  depth++;
3123  break;
3124  case ']':
3125  if(looking_at(p, "]>"))
3126  depth--;
3127  }
3128  }
3129  }
3130  else
3131  return error(p, "INCLUDE or IGNORE required in conditional section");
3132 
3133  return 0;
3134 }
3135 
3136 static void maybe_uppercase(Parser p, Char *s)
3137 {
3139  while(*s)
3140  {
3141  *s = Toupper(*s);
3142  s++;
3143  }
3144 }
3145 
3146 static void maybe_uppercase_name(Parser p)
3147 {
3148  int i;
3149 
3151  for(i=0; i<p->namelen; i++)
3152  p->name[i] = Toupper(p->name[i]);
3153 }
3154 
3155 static int str_maybecase_cmp8(Parser p, const char8 *a, const char8 *b)
3156 {
3157  return
3158  ParserGetFlag(p, CaseInsensitive) ? strcasecmp8(a, b) : strcmp8(a, b);
3159 }
3160 
3161 static int is_ascii_alpha(int c)
3162 {
3163  return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
3164 }
3165 
3166 static int is_ascii_digit(int c)
3167 {
3168  return c >= '0' && c <= '9';
3169 }
3170 
3171 /* Error handling */
3172 
3173 static void verror(XBit bit, const char8 *format, va_list args)
3174 {
3175  /* yuk, but we don't want to fail if we can't allocate */
3176  static char8 message[400];
3177 
3178  /* Print message before freeing xbit, so we can print data from it */
3179  Vsprintf(message, CE_ISO_8859_1, format, args);
3180 
3181  FreeXBit(bit);
3182  bit->type = XBIT_error;
3183  bit->error_message = message;
3184 }
3185 
3186 static int error(Parser p, const char8 *format, ...)
3187 {
3188  va_list args;
3189 
3190  va_start(args, format);
3191  verror(&p->xbit, format, args);
3192 
3193  p->state = PS_error;
3194  va_end(args);
3195  return -1;
3196 }
3197 
3198 static void warn(Parser p, const char8 *format, ...)
3199 {
3200  va_list args;
3201  static struct xbit bit;
3202 
3203  va_start(args, format);
3204  verror(&bit, format, args);
3205 
3206  bit.type = XBIT_warning;
3207 
3208  if(p->warning_callback)
3209  p->warning_callback(&bit, p->callback_arg);
3210  else
3211  ParserPerror(p, &bit);
3212  va_end(args);
3213 }
3214 
XML_API int SourceLineAndChar(InputSource s, int *linenum, int *charnum)
Definition: input.c:118
Char * value
Definition: xmlparser.h:47
XML_API AttributeDefinition DefineAttributeN(ElementDefinition element, const Char *name, int namelen, AttributeType type, Char **allowed_values, DefaultType default_type, const Char *default_value)
Definition: dtd.c:582
XML_API int get_with_fill(InputSource s)
Definition: input.c:529
#define ExpandBuf(buf, sz)
Definition: xmlparser.c:111
#define is_xml_legal(c)
Definition: ctype16.h:47
STD_API FILE16 * Stderr
Definition: stdio16.c:123
#define DefineNotation(dtd, name, pub, sys)
Definition: dtd.h:248
XML_API void FreeEntity(Entity e)
Definition: dtd.c:250
InputSource source
Definition: rxp.c:24
void FreeParser(Parser p)
Definition: xmlparser.c:488
STD_API const char8 * CharacterEncodingName[CE_enum_count]
Definition: charset.c:170
struct attribute * next
Definition: xmlparser.h:49
InputSource EntityOpenerProc(Entity e, void *arg)
Definition: xmlparser.h:30
Definition: dtd.h:140
void ParserSetWarningCallback(Parser p, CallbackProc cb)
Definition: xmlparser.c:523
Definition: xmlparser.h:65
#define Chartochar8
Definition: string16.h:109
Definition: dtd.h:59
#define FindNotation(dtd, name)
Definition: dtd.h:252
#define require(x)
Definition: xmlparser.c:107
default_type
Definition: dtd.h:128
void ParserSetCallbackArg(Parser p, void *arg)
Definition: xmlparser.c:513
#define is_xml_namechar(c)
Definition: ctype16.h:49
void ParserPerror(Parser p, XBit bit)
Definition: xmlparser.c:772
STD_API int strcasecmp8(const char8 *, const char8 *)
Definition: string16.c:33
#define require0(x)
Definition: xmlparser.c:108
#define Strdup
Definition: string16.h:96
bool save(Lattice &lattice, EST_String filename)
Definition: dtd.h:141
#define Consume(buf)
Definition: xmlparser.c:110
#define NewInternalEntity(name, test, parent, l, l1, mat)
Definition: dtd.h:206
STD_API int Vsprintf(void *buf, CharacterEncoding enc, const char *format, va_list args)
Definition: stdio16.c:400
XML_API void determine_character_encoding(InputSource s)
Definition: input.c:480
#define strcmp8(s1, s2)
Definition: string16.h:52
#define FindElement(dtd, name)
Definition: dtd.h:223
XBit ReadXBit(Parser p)
Definition: xmlparser.c:633
int nchildren
Definition: xmlparser.h:75
void CallbackProc(XBit bit, void *arg)
Definition: xmlparser.h:29
XML_API const char8 * EntityDescription(Entity e)
Definition: dtd.c:303
void ParserPop(Parser p)
Definition: xmlparser.c:711
EST_Track error(EST_Track &ref, EST_Track &test, int relax=0)
enum attribute_type AttributeType
Definition: dtd.h:143
Definition: dtd.h:131
XML_API InputSource EntityOpen(Entity e)
Definition: input.c:68
XML_API Entity DefineEntity(Dtd dtd, Entity entity, int pe)
Definition: dtd.c:338
#define NewExternalEntity(name, pub, sys, nnot, parent)
Definition: dtd.h:204
void Free(void *mem)
Definition: system.c:35
#define FindEntity(dtd, name, pe)
Definition: dtd.h:208
Definition: dtd.h:139
Definition: dtd.h:131
XML_API ElementDefinition DefineElementN(Dtd dtd, const Char *name, int namelen, ContentType type, Char *content)
Definition: dtd.c:378
Definition: dtd.h:105
STD_API int Toupper(int c)
Definition: ctype16.c:52
Definition: dtd.h:141
Parser NewParser(void)
Definition: xmlparser.c:432
XML_API Dtd NewDtd(void)
Definition: dtd.c:93
void ParserSetFlag(Parser p, ParserFlag flag, int value)
Definition: xmlparser.c:756
enum content_type ContentType
Definition: dtd.h:107
void ParserSetDtdCallback(Parser p, CallbackProc cb)
Definition: xmlparser.c:518
#define DefineAttribute(element, name, type, all, dt, dv)
Definition: dtd.h:234
#define is_xml_whitespace(c)
Definition: ctype16.h:50
#define CopyName0(n)
Definition: xmlparser.c:116
void FreeXBit(XBit xbit)
Definition: xmlparser.c:300
STD_API void init_charset(void)
Definition: charset.c:233
#define TentativelyDefineElement(dtd, name)
Definition: dtd.h:221
int ParserInit(void)
Definition: xmlparser.c:137
AttributeDefinition NextAttributeDefinition(ElementDefinition element, AttributeDefinition previous)
Definition: dtd.c:661
XBit PeekXBit(Parser p)
Definition: xmlparser.c:643
#define XEOE
Definition: input.h:74
AttributeDefinition definition
Definition: xmlparser.h:46
enum default_type DefaultType
Definition: dtd.h:133
XBit ReadXTree(Parser p)
Definition: xmlparser.c:535
void FreeContentParticle(ContentParticle cp)
Definition: xmlparser.c:2649
XML_API NotationDefinition FindNotationN(Dtd dtd, const Char *name, int namelen)
Definition: dtd.c:768
f
Definition: EST_item_aux.cc:48
Definition: dtd.h:59
Definition: dtd.h:64
#define Strlen
Definition: string16.h:98
literal_type
Definition: xmlparser.c:95
const char8 * XBitTypeName[XBIT_enum_count]
Definition: xmlparser.c:118
XML_API Entity FindEntityN(Dtd dtd, const Char *name, int namelen, int pe)
Definition: dtd.c:354
enum xbit_type type
Definition: xmlparser.h:68
int expand
Definition: rxp.c:20
STD_API void init_stdio16(void)
Definition: stdio16.c:125
XBit ParseDtd(Parser p, Entity e)
Definition: xmlparser.c:1986
STD_API int EncodingsCompatible(CharacterEncoding enc1, CharacterEncoding enc2, CharacterEncoding *enc3)
Definition: charset.c:297
#define CopyName(n)
Definition: xmlparser.c:114
Definition: dtd.h:105
#define unget(s)
Definition: input.h:78
#define Strstr
Definition: string16.h:106
#define DefineElement(dtd, name, type, content)
Definition: dtd.h:219
void FreeXTree(XBit tree)
Definition: xmlparser.c:613
enum character_encoding CharacterEncoding
Definition: charset.h:61
LISP quote(LISP item)
Definition: siod.cc:252
XML_API ElementDefinition RedefineElement(ElementDefinition e, ContentType type, Char *content)
Definition: dtd.c:470
enum parser_flag ParserFlag
Definition: xmlparser.h:128
#define is_xml_namestart(c)
Definition: ctype16.h:48
Entity ParserRootEntity(Parser p)
Definition: xmlparser.c:508
#define FindAttribute(element, name)
Definition: dtd.h:236
STD_API int Fclose(FILE16 *file)
Definition: stdio16.c:319
XML_API ElementDefinition FindElementN(Dtd dtd, const Char *name, int namelen)
Definition: dtd.c:495
#define strchr8(s, c)
Definition: string16.h:50
STD_API int EncodingIsAsciiSuperset(CharacterEncoding enc)
Definition: charset.c:286
Definition: dtd.h:64
#define Strcasecmp
Definition: string16.h:104
char char8
Definition: charset.h:31
STD_API int Fprintf(FILE16 *file, const char *format,...)
Definition: stdio16.c:365
void ParserSetEntityOpener(Parser p, EntityOpenerProc opener)
Definition: xmlparser.c:528
int tree
Definition: rxp.c:21
InputSource ParserRootSource(Parser p)
Definition: xmlparser.c:498
STD_API char8 * strdup8(const char8 *s)
Definition: string16.c:77
STD_API CharacterEncoding FindEncoding(char8 *name)
Definition: charset.c:335
void * Malloc(int bytes)
Definition: system.c:19
XML_API NotationDefinition RedefineNotation(NotationDefinition n, const char8 *publicid, const char8 *systemid)
Definition: dtd.c:758
Definition: dtd.h:139
void * Realloc(void *mem, int bytes)
Definition: system.c:27
EST_Item * parent(const EST_Item *n)
return parent of n
int quoted
Definition: xmlparser.h:48
XML_API AttributeDefinition FindAttributeN(ElementDefinition element, const Char *name, int namelen)
Definition: dtd.c:637
#define ParserGetFlag(p, flag)
Definition: xmlparser.h:190
XML_API NotationDefinition TentativelyDefineNotationN(Dtd dtd, const Char *name, int namelen)
Definition: dtd.c:735
int ParserPush(Parser p, InputSource source)
Definition: xmlparser.c:656
Definition: dtd.h:105
STD_API FILE16 * MakeFILE16FromString(void *buf, long size, const char *type)
Definition: stdio16.c:727
#define at_eol(s)
Definition: input.h:76
XML_API void SourcePosition(InputSource s, Entity *entity, int *char_number)
Definition: input.c:157
STD_API void init_ctype16(void)
Definition: ctype16.c:26
XML_API Entity NewInternalEntityN(const Char *name, int namelen, const Char *text, Entity parent, int line_offset, int line1_char_offset, int matches_parent_text)
Definition: dtd.c:216