Edinburgh Speech Tools  2.1-release
solexml.cc
Go to the documentation of this file.
1  /************************************************************************/
2  /* */
3  /* Centre for Speech Technology Research */
4  /* University of Edinburgh, UK */
5  /* Copyright (c) 1996,1997 */
6  /* All Rights Reserved. */
7  /* */
8  /* Permission is hereby granted, free of charge, to use and distribute */
9  /* this software and its documentation without restriction, including */
10  /* without limitation the rights to use, copy, modify, merge, publish, */
11  /* distribute, sublicense, and/or sell copies of this work, and to */
12  /* permit persons to whom this work is furnished to do so, subject to */
13  /* the following conditions: */
14  /* 1. The code must retain the above copyright notice, this list of */
15  /* conditions and the following disclaimer. */
16  /* 2. Any modifications must be clearly marked as such. */
17  /* 3. Original authors' names are not deleted. */
18  /* 4. The authors' names are not used to endorse or promote products */
19  /* derived from this software without specific prior written */
20  /* permission. */
21  /* */
22  /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23  /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24  /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25  /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26  /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27  /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28  /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29  /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30  /* THIS SOFTWARE. */
31  /* */
32  /*************************************************************************/
33  /* */
34  /* Author: Richard Caley (rjc@cstr.ed.ac.uk) */
35  /* -------------------------------------------------------------------- */
36  /* Code to reas SOLE format XML as utterances. */
37  /* */
38  /*************************************************************************/
39 
40 #include <cstdlib>
41 #include <cstdio>
42 #include "EST_THash.h"
43 #include "EST_error.h"
44 #include "solexml.h"
45 #include "rxp/XML_Parser.h"
46 
47 static EST_Regex simpleIDRegex(".*#id(w\\([0-9]+\\))");
48 static EST_Regex rangeIDRegex(".*#id(w\\([0-9]+\\)).*id(w\\([0-9]+\\))");
49 
50 class Parse_State
51  {
52 public:
53  int depth;
54  EST_String relName;
55  EST_Utterance *utt;
56  EST_Relation *rel;
58  EST_Item *current;
59 
61 
62  Parse_State() : contents(100) {
63  depth = 0;
64  utt = 0;
65  rel = 0;
66  parent = 0;
67  current = 0;
68  }
69  };
70 
71 class Sole_Parser_Class : public XML_Parser_Class
72 {
73 protected:
74  virtual void document_open(XML_Parser_Class &c,
75  XML_Parser &p,
76  void *data);
77  virtual void document_close(XML_Parser_Class &c,
78  XML_Parser &p,
79  void *data);
80 
81  virtual void element_open(XML_Parser_Class &c,
82  XML_Parser &p,
83  void *data,
84  const char *name,
85  XML_Attribute_List &attributes);
86  virtual void element(XML_Parser_Class &c,
87  XML_Parser &p,
88  void *data,
89  const char *name,
90  XML_Attribute_List &attributes);
91  virtual void element_close(XML_Parser_Class &c,
92  XML_Parser &p,
93  void *data,
94  const char *name);
95 
96  virtual void pcdata(XML_Parser_Class &c,
97  XML_Parser &p,
98  void *data,
99  const char *chars);
100  virtual void cdata(XML_Parser_Class &c,
101  XML_Parser &p,
102  void *data,
103  const char *chars);
104 
105  virtual void processing(XML_Parser_Class &c,
106  XML_Parser &p,
107  void *data,
108  const char *instruction);
109  virtual void error(XML_Parser_Class &c,
110  XML_Parser &p,
111  void *data);
112 };
113 
114 static void print_attributes(XML_Attribute_List &attributes)
115 {
117 
118  for(them.begin(attributes); them ; them++)
119  printf(" %s='%s'",
120  (const char *)them->k,
121  (const char *)them->v);
122 }
123 
125  const EST_String &name,
126  EST_Utterance &u,
127  int &max_id)
128 {
129  (void)max_id;
130  (void)print_attributes; // just to shut -Wall up.
131  Sole_Parser_Class pclass;
132  Parse_State state;
133 
134  u.clear();
135 
136  state.utt=&u;
137 
138  XML_Parser *parser = pclass.make_parser(file, name, &state);
139  parser->track_context(TRUE);
140 
141  CATCH_ERRORS()
142  return read_format_error;
143 
144  parser->go();
145 
147 
148  return read_ok;
149 }
150 
151 static void ensure_relation(Parse_State *state)
152 {
153  if (state->rel==NULL)
154  {
155  state->rel = state->utt->create_relation(state->relName);
156  }
157 }
158 
159 static EST_Item_Content *get_contents(Parse_State *state, EST_String id)
160 {
161  EST_Item_Content *c = state->contents.val(id);
162  if (c==NULL)
163  {
164  c = new EST_Item_Content();
165  state->contents.add_item(id, c);
166  }
167 
168  return c;
169 }
170 
171 static void extract_ids(XML_Attribute_List &attributes,
173 {
174  EST_String val;
175  static int count;
176  if (attributes.present("id"))
177  {
178  val = attributes.val("id");
179  ids.append(val);
180  }
181  else if (attributes.present("href"))
182  {
183  val = attributes.val("href");
184  size_t starts[EST_Regex_max_subexpressions];
185  size_t ends[EST_Regex_max_subexpressions];
186 
187  if (val.matches(simpleIDRegex, 0, starts, ends))
188  {
189  EST_String n = val.at(starts[1], ends[1]-starts[1]);
190 
191  ids.append("w" + n);
192  }
193  else if (val.matches(rangeIDRegex, 0, starts, ends))
194  {
195  int n1 = atoi(val.at(starts[1], ends[1]-starts[1]));
196  int n2 = atoi(val.at(starts[2], ends[2]-starts[2]));
197 
198  for(int i=n1; i<=n2; i++)
199  {
200  char buf[100];
201  sprintf(buf, "w%d", i);
202 
203  ids.append(buf);
204  }
205 
206  }
207  else
208  EST_warning("element with bad ID or HREF '%s'", (const char *)val);
209  }
210  else
211  {
212  char buf[100];
213  sprintf(buf, "n%d", ++count);
214 
215  ids.append(buf);
216  return;
217  }
218 
219 }
220 
221 
222 /** Now we define the callbacks.
223  */
224 
225 void Sole_Parser_Class::document_open(XML_Parser_Class &c,
226  XML_Parser &p,
227  void *data)
228 {
229  (void)c; (void)p;
230  Parse_State *state = (Parse_State *)data;
231 
232  state->depth=1;
233  state->rel=NULL;
234  state->parent=NULL;
235  state->current=NULL;
236 }
237 
238 void Sole_Parser_Class::document_close(XML_Parser_Class &c,
239  XML_Parser &p,
240  void *data)
241 {
242  (void)c; (void)p; (void)data;
243 }
244 
245 
246 void Sole_Parser_Class::element_open(XML_Parser_Class &c,
247  XML_Parser &p,
248  void *data,
249  const char *name,
250  XML_Attribute_List &attributes)
251 {
252  (void)c; (void)p; (void)attributes;
253  Parse_State *state = (Parse_State *)data;
254 
255  state->depth++;
256 
257  if (strcmp(name, "solexml")==0)
258  {
259  state->relName=attributes.val("relation");
260  printf("start solexml relation=%s\n", (const char *)state->relName);
261  return;
262  }
263  else if (strcmp(name, "text-elem")==0)
264  {
265  // ignore these
266  return;
267  }
268 
269  ensure_relation(state);
270 
271  if (strcmp(name, "anaphora-elem")==0
272  || strcmp(name, "wordlist")==0
273  || strcmp(name, "w")==0)
274  {
276  extract_ids(attributes, ids);
277 
278  EST_Litem *idp = ids.head();
279  bool first=TRUE;
280  for(; idp!= NULL; idp = idp->next())
281  {
282  EST_String id = ids(idp);
283  if (id==EST_String::Empty)
284  XML_Parser_Class::error(c, p, data, EST_String("Element With No Id"));
285 
286  if (first)
287  first=FALSE;
288  else
289  {
290  state->current = state->parent;
291  state->parent=state->parent->up();
292  }
293 
294 
295  EST_Item_Content *cont = get_contents(state, id);
296 
297  cont->set_name(id);
298 
300  for(them.begin(attributes); them ; them++)
301  {
302  EST_String k = them->k;
303  EST_String v = them->v;
304  cont->f.set(k,v);
305  }
306 
307  EST_Item *item;
308 
309  if (state->current == NULL)
310  if (state->parent == NULL)
311  item = state->rel->append();
312  else
313  item = state->parent->insert_below();
314  else
315  item = state->current->insert_after();
316 
317  item->set_contents(cont);
318 
319  state->current=NULL;
320  state->parent=item;
321  }
322  }
323  else
324  EST_warning("SOLE XML Parser: unknown element %s", name);
325 }
326 
327 
328 void Sole_Parser_Class::element(XML_Parser_Class &c,
329  XML_Parser &p,
330  void *data,
331  const char *name,
332  XML_Attribute_List &attributes)
333 {
334  (void)c; (void)p; (void)attributes;
335  Parse_State *state = (Parse_State *)data;
336 
337  if (strcmp(name, "language")==0)
338  {
339  state->utt->f.set("language", attributes.val("name"));
340  return;
341  }
342 
343  element_open(c, p, data, name, attributes);
344  element_close(c, p, data, name);
345 }
346 
347 
348 void Sole_Parser_Class::element_close(XML_Parser_Class &c,
349  XML_Parser &p,
350  void *data,
351  const char *name)
352 {
353  (void)c; (void)p; (void)name;
354  Parse_State *state = (Parse_State *)data;
355 
356  if (strcmp(name, "anaphora-elem")==0
357  || strcmp(name, "wordlist")==0
358  || strcmp(name, "w")==0)
359  {
360  state->depth--;
361  state->current = state->parent;
362  state->parent=state->parent->up();;
363  }
364 }
365 
366 
367 void Sole_Parser_Class::pcdata(XML_Parser_Class &c,
368  XML_Parser &p,
369  void *data,
370  const char *chars)
371 {
372  (void)c;
373 
374  Parse_State *state = (Parse_State *)data;
375 
376  if (state->parent != NULL && p.context(0) == "w")
377  state->parent->set(EST_String("word"), chars);
378 
379  // printf("SOLE XML Parser [pcdata[%s]] %d\n", chars, state->depth);
380 }
381 
382 
383 void Sole_Parser_Class::cdata(XML_Parser_Class &c,
384  XML_Parser &p,
385  void *data,
386  const char *chars)
387 {
388  (void)c; (void)p; (void)data; (void)chars;
389  // Parse_State *state = (Parse_State *)data;
390 
391  // printf("SOLE XML Parser [cdata[%s]] %d\n", chars, state->depth);
392 }
393 
394 
395 void Sole_Parser_Class::processing(XML_Parser_Class &c,
396  XML_Parser &p,
397  void *data,
398  const char *instruction)
399 {
400  (void)c; (void)p;
401  Parse_State *state = (Parse_State *)data;
402 
403  printf("SOLE XML Parser [proc[%s]] %d\n", instruction, state->depth);
404 }
405 
406 
408  XML_Parser &p,
409  void *data)
410 {
411  (void)c; (void)p; (void)data;
412  // Parse_State *state = (Parse_State *)data;
413 
414  EST_error("SOLE XML Parser %s", get_error(p));
415 
416  est_error_throw();
417 }
EST_read_status solexml_read(FILE *file, const EST_String &name, EST_Utterance &u, int &max_id)
Definition: solexml.cc:124
void set_contents(EST_Item_Content *li)
Definition: EST_Item.cc:202
#define END_CATCH_ERRORS()
Definition: EST_error.h:144
void clear()
remove everything in utterance
The file was read in successfully.
A Regular expression class to go with the CSTR EST_String class.
Definition: EST_Regex.h:56
#define EST_Regex_max_subexpressions
Definition: EST_Regex.h:150
void set(const EST_String &name, int ival)
Definition: EST_Features.h:186
A specialised hash table for when the key is an EST_String.
Definition: EST_THash.h:304
EST_UItem * next()
Definition: EST_UList.h:55
EST_Track error(EST_Track &ref, EST_Track &test, int relax=0)
EST_Features f
General features for this item.
void go()
Run the parser.
Definition: XML_Parser.cc:282
#define est_error_throw()
Definition: EST_error.h:112
V & val(const K &key, int &found) const
Definition: EST_THash.cc:114
int present(const K &key) const
Does the key have an entry?
Definition: EST_THash.cc:96
void track_context(bool flag)
Definition: XML_Parser.cc:391
#define FALSE
Definition: EST_bool.h:119
#define CATCH_ERRORS()
Definition: EST_error.h:126
The file exists but is not in the format specified.
NULL
Definition: EST_WFST.cc:55
#define EST_error
Definition: EST_error.h:104
void set_name(const EST_String &s)
set name
int matches(const char *e, ssize_t pos=0) const
Exactly match this string?
Definition: EST_String.cc:651
EST_String context(int n)
Definition: XML_Parser.cc:458
void append(const T &item)
add item onto end of list
Definition: EST_TList.h:196
EST_read_status
void begin(const Container &over)
Set the iterator ready to run over this container.
#define EST_warning
Definition: EST_error.h:106
EST_UItem * head() const
Definition: EST_UList.h:97
virtual void error(XML_Parser_Class &c, XML_Parser &p, void *data)
Definition: XML_Parser.cc:225
EST_String
EST_String at(int from, int len=0) const
Return part at position.
Definition: EST_String.h:292
#define TRUE
Definition: EST_bool.h:118
EST_Item * parent(const EST_Item *n)
return parent of n
static const EST_String Empty
Constant empty string.
Definition: EST_String.h:110