54 #if defined(ESTLIBDIRC) 55 # define __STRINGIZE(X) #X 56 # define ESTLIBDIR __STRINGIZE(ESTLIBDIRC) 60 static EST_Regex simpleIDRegex(
"[^#]*#id(\\([-a-z0-9]+\\))");
61 static EST_Regex rangeIDRegex(
"[^#]*#id(\\([a-z]*\\)\\([0-9]*\\)\\(-\\([0-9]+\\)\\)*).*id(\\([a-z]*\\)\\([0-9]*\\)\\(-\\([0-9]+\\)\\)*)");
62 static EST_Regex featureDefRegex(
"\\([^:]*\\):\\(.*\\)");
71 #define MAX_FEATS (50) 75 class GenXML_Parse_State
96 GenXML_Parse_State() : contents(100) {
145 const char *instruction);
160 pclass =
new GenXML_Parser_Class();
162 printf(
"Register estlib in genxml %s\n",
ESTLIBDIR "/\\1.dtd");
165 pclass->register_id(
"//CSTR EST//DTD \\(.*\\)//[A-Z]*",
167 pclass->register_id(
"//CSTR EST//ENTITIES \\(.*\\)//[A-Z]*",
194 (void)print_attributes;
195 GenXML_Parse_State state;
214 static void ensure_relation(GenXML_Parse_State *state,
EST_String name)
216 if (state->rel!=
NULL && name == state->relName)
219 state->rel = state->utt->create_relation(state->relName=name);
229 state->contents.add_item(
id, c);
246 sprintf(buf,
"%s%d", root, ++count);
257 val = attributes.
val(
"id");
258 #if defined(EST_DEBUGGING) 259 fprintf(stderr,
"ID %s\n", (
const char *)val);
263 else if (attributes.
present(
"href"))
265 val = attributes.
val(
"href");
269 if (val.
matches(simpleIDRegex, 0, starts, ends))
272 #if defined(EST_DEBUGGING) 273 fprintf(stderr,
"SIMPLE %s\n", (
const char *)n);
277 else if (val.
matches(rangeIDRegex, 0, starts, ends))
279 EST_String prefix1 = val.
at(starts[1], ends[1]-starts[1]);
280 int n1 = atoi(val.
at(starts[2], ends[2]-starts[2]));
281 EST_String postfix1 = val.
at(starts[4], ends[4]-starts[4]);
282 EST_String prefix2 = val.
at(starts[5], ends[5]-starts[5]);
283 int n2 = atoi(val.
at(starts[6], ends[6]-starts[6]));
284 EST_String postfix2 = val.
at(starts[8], ends[8]-starts[8]);
286 #if defined(EST_DEBUGGING) 287 fprintf(stderr,
"RANGE '%s' %d - '%s' // '%s' %d - '%s'\n",
288 (
const char *)prefix1,
290 (
const char *)postfix1,
291 (
const char *)prefix2,
293 (
const char *)postfix2
297 if (prefix1==prefix2)
304 if (postfix1.length()==0)
306 sprintf(buf,
"%s%s%d",
307 (
const char *)prefix1,
308 (
const char *)prefix2,
317 if (postfix2.length()>0)
318 for (; c<=atoi(postfix2); c++)
320 sprintf(buf,
"%s%s%d-%d",
321 (
const char *)prefix1,
322 (
const char *)prefix2,
331 for(
int i=n1; i<=n2; i++)
334 && postfix2.length()>0)
336 sprintf(buf,
"%s%s%d",
337 (
const char *)prefix1,
338 (
const char *)prefix2,
342 for (
int c=1; c<=atoi(postfix2); c++)
344 sprintf(buf,
"%s%s%d-%d",
345 (
const char *)prefix1,
346 (
const char *)prefix2,
355 if ( postfix1.length()>0)
356 sprintf(buf,
"%s%s%d-%s",
357 (
const char *)prefix1,
358 (
const char *)prefix2,
360 (
const char *)postfix1
363 sprintf(buf,
"%s%s%d",
364 (
const char *)prefix1,
365 (
const char *)prefix2,
377 EST_warning(
"element with bad ID or HREF '%s'", (
const char *)val);
380 ids.
append(make_new_id(
"n"));
391 for(them.
begin(attributes); them ; them++)
393 (
const char *)them->k,
394 (
const char *)them->v);
405 GenXML_Parse_State *state = (GenXML_Parse_State *)data;
408 state->open_depth=-1;
409 state->rel_start_depth=-1;
410 state->depth_stack.clear();
421 (void)c; (void)p; (void)data;
424 static void proccess_features(
EST_String name,
433 size_t n = split(defs, names,
MAX_FEATS, feat_sep);
434 for(
size_t i=0; i<n; i++)
440 if (def.
matches(featureDefRegex, 0, starts, ends))
442 feat = def.
at(starts[1], ends[1]-starts[1]);
443 attr = def.
at(starts[2], ends[2]-starts[2]);
454 printf(
"on %s got %s(%s)=%s\n", name,
470 (void)c; (void)p; (void)attributes; (void)name;
471 GenXML_Parse_State *state = (GenXML_Parse_State *)data;
478 if (state->utt !=
NULL 480 proccess_features(name, val, attributes, state->utt->f);
483 if (state->rel !=
NULL 485 proccess_features(name, val, attributes, state->rel->f);
496 EST_warning(
"%s\nNo feature '%s' to name relation\n", get_error(p), (
const char *)val);
499 EST_String relationType = attributes.
val(
"estRelationTypeAttr");
501 ensure_relation(state, relName);
502 state->rel_start_depth=state->depth;
503 state->linear=(attributes.
val(relationType) ==
"linear"||
504 attributes.
val(relationType) ==
"list");
506 printf(
"start of relation depth=%d name=%s type=%s\n", state->depth, (
const char *)relName, state->linear?
"linear":
"tree");
509 else if ((state->rel_start_depth >= 0 &&
515 printf(
"push depth=%d name=%s ig=%s\n", state->depth, name, (
const char *)ig);
518 ensure_relation(state, val);
520 state->depth_stack.push(state->open_depth);
521 state->open_depth=state->depth;
527 extract_ids(attributes, ids);
550 for(them.
begin(attributes); them ; them++)
557 cont->
f.
set(
"id",
id);
562 if (state->current ==
NULL)
563 item = state->rel->append();
565 item = state->current->insert_after();
566 else if (state->current ==
NULL)
567 if (state->parent ==
NULL)
568 item = state->rel->append();
570 item = state->parent->append_daughter();
572 if (state->parent ==
NULL)
573 item = state->current->insert_after();
575 item = state->parent->append_daughter();
586 bool embed = (attributes.
val(
"estExpansion") ==
"embed");
589 state->id=make_new_id(
"e");
590 element_open(c, p, data, name, attributes);
595 for(; idp!=
NULL; idp = idp->
next())
602 element_close(c, p, data, name);
607 element_open(c, p, data, name, attributes);
612 element_close(c, p, data, name);
618 if (state->parent!=
NULL)
619 state->contentAttr = attributes.
val(
"estContentFeature");
622 printf(
"\t current=%s parent=%s contA=%s\n",
623 (
const char *)state->current->name(),
624 (
const char *)state->parent->name(),
625 (
const char *)state->contentAttr);
641 (void)c; (void)p; (void)attributes;
642 GenXML_Parse_State *state = (GenXML_Parse_State *)data;
645 element_open(c, p, data, name, attributes);
646 element_close(c, p, data, name);
655 (void)c; (void)p; (void)name;
656 GenXML_Parse_State *state = (GenXML_Parse_State *)data;
661 if (state->depth == state->rel_start_depth )
664 printf(
"end of relation depth=%d name=%s\n", state->depth, name);
666 state->rel_start_depth=-1;
670 state->depth == state->open_depth)
673 printf(
"pop depth=%d name=%s\n", state->depth, name);
675 state->current = state->parent;
676 state->parent=
parent(state->parent);
677 state->open_depth = state->depth_stack.pop();
679 printf(
"\t current=%s parent=%s\n",
680 (
const char *)state->current->name(),
681 (
const char *)state->parent->name());
697 GenXML_Parse_State *state = (GenXML_Parse_State *)data;
701 state->parent->set(state->contentAttr, chars);
704 printf(
"GEN XML Parser [pcdata[%s]] %d\n", chars, state->depth);
714 (void)c; (void)p; (void)data; (void)chars;
718 printf(
"GEN XML Parser [cdata[%s]] %d\n", chars, state->depth);
726 const char *instruction)
728 (void)c; (void)p; (void)instruction;
729 GenXML_Parse_State *state = (GenXML_Parse_State *)data;
733 printf(
"GEN XML Parser [proc[%s]] %d\n", instruction, state->depth);
742 (void)c; (void)p; (void)data;
745 EST_error(
"GEN XML Parser %s", get_error(p));
753 #if defined(INSTANTIATE_TEMPLATES) 755 #include "../base_class/EST_THash.cc"
static void registered_ids(EST_StrList &list)
void set_contents(EST_Item_Content *li)
#define END_CATCH_ERRORS()
static EST_read_status read_xml(FILE *file, const EST_String &name, EST_Utterance &u, int &max_id)
void clear()
remove everything in utterance
The file was read in successfully.
A Regular expression class to go with the CSTR EST_String class.
#define EST_Regex_max_subexpressions
void registered_ids(EST_TList< EST_String > &list)
void set(const EST_String &name, int ival)
EST_Item * root(const EST_Item *n)
return root node of treeprevious sibling (sister) of n
EST_Track error(EST_Track &ref, EST_Track &test, int relax=0)
EST_Features f
General features for this item.
void register_id(EST_Regex id_pattern, EST_String directory)
static EST_String cat(const EST_String s1, const EST_String s2=Empty, const EST_String s3=Empty, const EST_String s4=Empty, const EST_String s5=Empty, const EST_String s6=Empty, const EST_String s7=Empty, const EST_String s8=Empty, const EST_String s9=Empty)
InputSource try_and_open(Entity ent)
EST_TKVL< EST_String, EST_Val > relations
#define est_error_throw()
V & val(const K &key, int &found) const
int present(const K &key) const
Does the key have an entry?
#define Instantiate_TStringHash_T(VAL, TAG)
void track_context(bool flag)
static InputSource try_and_open(Entity ent)
The file exists but is not in the format specified.
const T & first() const
return const reference to first item in list
static void register_id(const EST_String pattern, const EST_String result)
int matches(const char *e, ssize_t pos=0) const
Exactly match this string?
void append(const T &item)
add item onto end of list
XML_Parser * make_parser(InputSource source, void *data)
Create a parser for the RXP InputSource.
void begin(const Container &over)
Set the iterator ready to run over this container.
int present(const K &rkey) const
Returns true if key is present.
An open hash table. The number of buckets should be set to allow enough space that there are relative...
virtual void error(XML_Parser_Class &c, XML_Parser &p, void *data)
EST_String at(int from, int len=0) const
Return part at position.
EST_Item * parent(const EST_Item *n)
return parent of n
static const EST_String Empty
Constant empty string.
static void class_init(void)