Edinburgh Speech Tools  2.1-release
XML_Parser.cc
Go to the documentation of this file.
1  /************************************************************************/
2  /* */
3  /* Centre for Speech Technology Research */
4  /* University of Edinburgh, UK */
5  /* Copyright (c) 1996,1997 */
6  /* All Rights Reserved. */
7  /* */
8  /* Permission is hereby granted, free of charge, to use and distribute */
9  /* this software and its documentation without restriction, including */
10  /* without limitation the rights to use, copy, modify, merge, publish, */
11  /* distribute, sublicense, and/or sell copies of this work, and to */
12  /* permit persons to whom this work is furnished to do so, subject to */
13  /* the following conditions: */
14  /* 1. The code must retain the above copyright notice, this list of */
15  /* conditions and the following disclaimer. */
16  /* 2. Any modifications must be clearly marked as such. */
17  /* 3. Original authors' names are not deleted. */
18  /* 4. The authors' names are not used to endorse or promote products */
19  /* derived from this software without specific prior written */
20  /* permission. */
21  /* */
22  /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23  /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24  /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25  /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26  /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27  /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28  /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29  /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30  /* THIS SOFTWARE. */
31  /* */
32  /*************************************************************************/
33  /* */
34  /* Author: Richard Caley (rjc@cstr.ed.ac.uk) */
35  /* -------------------------------------------------------------------- */
36  /* Recursive descent parsing skeleton. */
37  /* */
38  /*************************************************************************/
39 
40 #include "EST_error.h"
41 #include "XML_Parser.h"
42 #include "rxp.h"
43 
45 {
46 }
47 
49 {
50  known_ids.add_item(id_pattern, directory);
51 }
52 
54 {
55  EST_Litem *p;
56 
57  for(p=known_ids.head(); p != 0; p= p->next())
58  {
59  EST_String re(known_ids.key(p).tostring());
60  EST_String &pattern = known_ids.val(p);
61 
62  list.append(re);
63  list.append(pattern);
64  }
65 }
66 
67 XML_Parser *XML_Parser_Class::make_parser(InputSource source, Entity ent, void *data)
68 {
69  return new XML_Parser(*this, source, ent, data);
70 }
71 
73 {
74  return new XML_Parser(*this, source, NULL, data);
75 }
76 
77 
79  const EST_String desc,
80  void *data)
81 {
82  FILE16 *input16=MakeFILE16FromFILE(input, "r");
83 
84  if (input16==NULL) {
85  EST_sys_error("Can't open 16 bit '%s'", (const char *)desc);
86  return 0;
87  }
88 
89  SetCloseUnderlying(input16, 0);
90 
91  Entity ent = NewExternalEntity("",0,strdup8(desc),0,0);
92 
93  return make_parser(NewInputSource(ent, input16), ent, data);
94 }
95 
96 
98  void *data)
99 {
100  return make_parser(input, "<ANONYMOUS>", data);
101 }
102 
103 
105  void *data)
106 {
107  if ( filename == "-" )
108  return make_parser(stdin, data);
109 
110  FILE *input = fopen(filename, "r");
111 
112  if (input==NULL) {
113  EST_sys_error("Can't open '%s'", (const char *)filename);
114  return 0;
115  }
116 
117  FILE16 *input16=MakeFILE16FromFILE(input, "r");
118 
119  if (input16==NULL) {
120  EST_sys_error("Can't open 16 bit '%s'", (const char *)filename);
121  return 0;
122  }
123  SetCloseUnderlying(input16, 1);
124 
125  Entity ent = NewExternalEntity("",0,strdup8(filename),0,0);
126 
127  return make_parser(NewInputSource(ent, input16), data);
128 }
129 
130 InputSource XML_Parser_Class::try_and_open(Entity ent)
131 
132 {
133  EST_String id = ent->publicid?ent->publicid:ent->systemid;
134  EST_Litem *p;
135 
136  size_t starts[EST_Regex_max_subexpressions];
137  size_t ends[EST_Regex_max_subexpressions];
138  for (p = known_ids.head(); p != 0; p = p->next())
139  {
140  EST_Regex &re = known_ids.key(p);
141  EST_String pattern(known_ids.val(p));
142 
143  if (id.matches(re, 0, starts, ends))
144  {
145  EST_String res(pattern);
146  res.subst(id, starts, ends);
147 
148  FILE *f;
149  FILE16 *f16;
150  if((f = fopen(res, "r")))
151  {
152  if(!(f16 = MakeFILE16FromFILE(f, "r")))
153  return 0;
154  SetCloseUnderlying(f16, 1);
155 
156  return NewInputSource(ent, f16);
157  }
158  }
159  }
160 
161  return EntityOpen(ent);
162 }
163 
164 
165 InputSource XML_Parser_Class::open_entity(Entity ent, void *arg)
166 {
167  XML_Parser *parser = (XML_Parser *)arg;
168 
169  return parser->open(ent);
170 }
171 
172 // Default do-nothing callbacks.
173 
175  XML_Parser &p,
176  void *data)
177 { (void)c; (void)p; (void)data; }
178 
180  XML_Parser &p,
181  void *data)
182 { (void)c; (void)p; (void)data; }
183 
185  XML_Parser &p,
186  void *data,
187  const char *name,
188  XML_Attribute_List &attributes)
189 { (void)c; (void)p; (void)data; (void)name; (void)attributes; }
190 
192  XML_Parser &p,
193  void *data,
194  const char *name,
195  XML_Attribute_List &attributes)
196 { (void)c; (void)p; (void)data; (void)name; (void)attributes;
197  element_open(c, p, data, name, attributes);
198  element_close(c, p, data, name);
199 }
200 
202  XML_Parser &p,
203  void *data,
204  const char *name)
205 { (void)c; (void)p; (void)data; (void)name; }
206 
208  XML_Parser &p,
209  void *data,
210  const char *chars)
211 { (void)c; (void)p; (void)data; (void)chars; }
212 
214  XML_Parser &p,
215  void *data,
216  const char *chars)
217 { (void)c; (void)p; (void)data; (void)chars; }
218 
220  XML_Parser &p,
221  void *data,
222  const char *instruction)
223 { (void)c; (void)p; (void)data; (void)instruction; }
224 
226  XML_Parser &p,
227  void *data)
228 { (void)c; (void)p; (void)data; }
229 
231 {
232  return p.get_error();
233 }
234 
236  XML_Parser &p,
237  void *data,
238  EST_String message)
239 {
240  if (p.current_bit != NULL)
241  p.current_bit->error_message = message;
242  error(c, p, data);
243 }
244 
245  /*************************************************************************/
246  /* */
247  /* An actual parser. */
248  /* */
249  /*************************************************************************/
250 
252  InputSource s,
253  Entity ent,
254  void *d)
255 {
256  p_track_context = 0;
257  p_track_contents = 0;
258  current_bit = 0;
259  pclass=&pc;
260  source=s;
261  initial_entity=ent;
262  data=d;
263  p = NewParser();
266  ParserSetCallbackArg(p, (void *)this);
267 }
268 
270 {
271  if (initial_entity)
272  FreeEntity(initial_entity);
273  FreeDtd(p->dtd);
274  FreeParser(p);
275 }
276 
277 InputSource XML_Parser::open(Entity ent)
278 {
279  return pclass->try_and_open(ent);
280 }
281 
283 {
284 
285  if (p_track_context)
286  p_context.clear();
287 
288  if (ParserPush(p, source) == -1)
289  EST_error("XML Parser error in push");
290 
291  pclass->document_open(*pclass, *this, data);
292 
293  XBit bit;
294  while (1)
295  {
296  current_bit = bit = ReadXBit(p);
297  if (bit->type == XBIT_eof)
298  break;
299  else if (bit->type == XBIT_start || bit->type == XBIT_empty)
300  {
301  Attribute b;
302  XML_Attribute_List att(10);
303 
304  for (b=bit->attributes; b; b=b->next)
305  {
306  att.add_item(EST_String(b->definition->name), EST_String(b->value));
307  }
308 
309  if (bit->type == XBIT_start)
310  {
311  pclass->element_open(*pclass,
312  *this,
313  data,
314  bit->element_definition->name,
315  att
316  );
317  if (p_track_context)
318  {
319  EST_String nm(bit->element_definition->name);
320  p_context.push(nm);
321  }
322 
323  }
324  else
325  pclass->element(*pclass,
326  *this,
327  data,
328  bit->element_definition->name,
329  att
330  );
331  }
332  else if (bit->type == XBIT_end)
333  {
334  if (p_track_context)
335  p_context.pop();
336 
337  pclass->element_close(*pclass,
338  *this,
339  data,
340  bit->element_definition->name
341  );
342  }
343  else if (bit->type == XBIT_pcdata)
344  {
345  pclass->pcdata(*pclass,
346  *this,
347  data,
348  bit->pcdata_chars
349  );
350  }
351  else if (bit->type == XBIT_cdsect)
352  {
353  pclass->cdata(*pclass,
354  *this,
355  data,
356  bit->cdsect_chars
357  );
358  }
359  else if (bit->type == XBIT_pi)
360  {
361  pclass->processing(*pclass,
362  *this,
363  data,
364  bit->pi_chars
365  );
366  }
367  else if (bit->type == XBIT_error)
368  {
369  pclass->error(*pclass,
370  *this,
371  data);
372  break;
373  }
374  else
375  {
376  // ignore it
377  }
378  FreeXBit(bit);
379  current_bit=NULL;
380  }
381 
382  if (current_bit!=NULL)
383  {
384  FreeXBit(bit);
385  current_bit=NULL;
386  }
387 
388  pclass->document_close(*pclass, *this, data);
389 }
390 
392 {
393  p_track_context=flag;
394 }
395 
397 {
398  p_track_contents=flag;
399 }
400 
401 
402 // Stolen from xmlparser.c, will need to be tweaked for internal rxp changes.
404 {
405  int linenum, charnum;
406  InputSource s;
407  XBit bit = current_bit;
408 
409  if (!bit)
410  return "No Parse In Progress";
411 
412  p_error_message =
414  bit->type == XBIT_error ? "Error" : "Warning",
415  ": ",
416  bit->error_message?bit->error_message:"non XML error"
417  );
418 
419  for(s=p->source; s; s=s->parent)
420  {
421  if(s->entity->name)
422  {
423  p_error_message += " in entity \"";
424  p_error_message += s->entity->name;
425  p_error_message += "\"";
426  }
427  else
428  p_error_message += " in unnamed entity";
429 
430  switch(SourceLineAndChar(s, &linenum, &charnum))
431  {
432  case 1:
433  p_error_message += EST_String::cat(" at line ",
434  EST_String::Number(linenum+1),
435  " char ",
436  EST_String::Number(charnum+1),
437  " of ");
438  break;
439  case 0:
440  p_error_message += EST_String::cat(" defined at line ",
441  EST_String::Number(linenum+1),
442  " char ",
443  EST_String::Number(charnum+1),
444  " of ");
445  break;
446  case -1:
447  p_error_message += " defined in ";
448  break;
449  }
450 
451  p_error_message += EntityDescription(s->entity);
452  p_error_message += "\n";
453  }
454 
455  return (const char *)p_error_message;
456 }
457 
459 {
460  return p_context.nth(n);
461 }
462 
XBit current_bit
The piece of markup being processed.
Definition: XML_Parser.h:259
XML_API int SourceLineAndChar(InputSource s, int *linenum, int *charnum)
Definition: input.c:118
void track_contents(bool flag)
Definition: XML_Parser.cc:396
const K & key(EST_Litem *ptr, int m=1) const
find key, reference by ptr
Definition: EST_TKVL.cc:201
int subst(EST_String source, size_t(&starts)[EST_Regex_max_subexpressions], size_t(&ends)[EST_Regex_max_subexpressions])
Substitute the result of a match into a string.
Definition: EST_String.cc:465
XML_API void FreeDtd(Dtd dtd)
Definition: dtd.c:118
XML_API void FreeEntity(Entity e)
Definition: dtd.c:250
InputSource source
Definition: rxp.c:24
static EST_String Number(int i, int base=10)
Build string from an integer.
Definition: EST_String.cc:1199
virtual void element_close(XML_Parser_Class &c, XML_Parser &p, void *data, const char *name)
Definition: XML_Parser.cc:201
STD_API FILE16 * MakeFILE16FromFILE(FILE *f, const char *type)
Definition: stdio16.c:670
XML_API InputSource NewInputSource(Entity e, FILE16 *f16)
Definition: input.c:88
virtual void pcdata(XML_Parser_Class &c, XML_Parser &p, void *data, const char *chars)
Definition: XML_Parser.cc:207
virtual void document_close(XML_Parser_Class &c, XML_Parser &p, void *data)
Definition: XML_Parser.cc:179
STD_API void SetCloseUnderlying(FILE16 *file, int cu)
Definition: stdio16.c:347
virtual void element_open(XML_Parser_Class &c, XML_Parser &p, void *data, const char *name, XML_Attribute_List &attributes)
Definition: XML_Parser.cc:184
A Regular expression class to go with the CSTR EST_String class.
Definition: EST_Regex.h:56
#define EST_Regex_max_subexpressions
Definition: EST_Regex.h:150
void registered_ids(EST_TList< EST_String > &list)
Definition: XML_Parser.cc:53
const char * get_error(XML_Parser &p)
Get the error message for the last error.
Definition: XML_Parser.cc:230
A specialised hash table for when the key is an EST_String.
Definition: EST_THash.h:304
~XML_Parser()
Destructor, may close input if required.
Definition: XML_Parser.cc:269
friend class XML_Parser
Definition: XML_Parser.h:231
XML_API const char8 * EntityDescription(Entity e)
Definition: dtd.c:303
EST_UItem * next()
Definition: EST_UList.h:55
XML_API InputSource EntityOpen(Entity e)
Definition: input.c:68
#define NewExternalEntity(name, pub, sys, nnot, parent)
Definition: dtd.h:204
void register_id(EST_Regex id_pattern, EST_String directory)
Definition: XML_Parser.cc:48
static EST_String cat(const EST_String s1, const EST_String s2=Empty, const EST_String s3=Empty, const EST_String s4=Empty, const EST_String s5=Empty, const EST_String s6=Empty, const EST_String s7=Empty, const EST_String s8=Empty, const EST_String s9=Empty)
Definition: EST_String.cc:1084
void go()
Run the parser.
Definition: XML_Parser.cc:282
InputSource try_and_open(Entity ent)
Definition: XML_Parser.cc:130
XML_API int ParserPush(Parser p, InputSource source)
Definition: xmlparser.c:656
static InputSource open_entity(Entity ent, void *arg)
Definition: XML_Parser.cc:165
#define EST_sys_error
Definition: EST_error.h:108
XML_Parser(XML_Parser_Class &parent, InputSource source, Entity initial_entity, void *data)
Creator used by XML_Parser_Class::make_parser()
Definition: XML_Parser.cc:251
virtual void element(XML_Parser_Class &c, XML_Parser &p, void *data, const char *name, XML_Attribute_List &attributes)
Definition: XML_Parser.cc:191
void track_context(bool flag)
Definition: XML_Parser.cc:391
XML_API Parser NewParser(void)
Definition: xmlparser.c:432
const char * get_error()
Get the error message for the last error.
Definition: XML_Parser.cc:403
XML_API void ParserSetFlag(Parser p, ParserFlag flag, int value)
Definition: xmlparser.c:756
NULL
Definition: EST_WFST.cc:55
EST_Litem * head() const
Return First key value pair in list.
Definition: EST_TKVL.h:100
#define EST_error
Definition: EST_error.h:104
f
Definition: EST_item_aux.cc:48
virtual void cdata(XML_Parser_Class &c, XML_Parser &p, void *data, const char *chars)
Definition: XML_Parser.cc:213
XML_API void FreeParser(Parser p)
Definition: xmlparser.c:488
const V & val(const K &rkey, bool m=0) const
return value according to key (const)
Definition: EST_TKVL.cc:145
EST_String context(int n)
Definition: XML_Parser.cc:458
void append(const T &item)
add item onto end of list
Definition: EST_TList.h:196
int add_item(const K &key, const V &value, int no_search=0)
Add an entry to the table.
Definition: EST_THash.cc:167
InputSource open(Entity ent)
Open. Asks the parser class to do the work.
Definition: XML_Parser.cc:277
XML_Parser * make_parser(InputSource source, void *data)
Create a parser for the RXP InputSource.
Definition: XML_Parser.cc:72
int add_item(const K &rkey, const V &rval, int no_search=0)
add key-val pair to list
Definition: EST_TKVL.cc:248
XML_API void ParserSetCallbackArg(Parser p, void *arg)
Definition: xmlparser.c:513
virtual void error(XML_Parser_Class &c, XML_Parser &p, void *data)
Definition: XML_Parser.cc:225
virtual void processing(XML_Parser_Class &c, XML_Parser &p, void *data, const char *instruction)
Definition: XML_Parser.cc:219
EST_String
STD_API char8 * strdup8(const char8 *s)
Definition: string16.c:77
XML_API XBit ReadXBit(Parser p)
Definition: xmlparser.c:633
EST_String tostring(void) const
Get the expression as a string.
Definition: EST_Regex.h:97
virtual void document_open(XML_Parser_Class &c, XML_Parser &p, void *data)
Definition: XML_Parser.cc:174
XML_API void ParserSetEntityOpener(Parser p, EntityOpenerProc opener)
Definition: xmlparser.c:528
XML_API void FreeXBit(XBit xbit)
Definition: xmlparser.c:300