49 static EST_Regex simpleIDRegex(
".*#id(w\\([0-9]+\\))");
50 static EST_Regex rangeIDRegex(
".*#id(w\\([0-9]+\\)).*id(w\\([0-9]+\\))");
51 static EST_Regex RXpunc(
"[\\.,\\?\\!\"]+");
108 const char *instruction);
118 for(them.
begin(attributes); them ; them++)
120 (
const char *)them->k,
121 (
const char *)them->v);
130 (void)print_attributes;
131 Apml_Parser_Class pclass;
138 XML_Parser *parser = pclass.make_parser(file, name, &state);
161 Parse_State *state = (Parse_State *)data;
168 state->last_token=
NULL;
171 state->perf = state->utt->create_relation(
"Perfomative");
172 state->com = state->utt->create_relation(
"Communicative");
173 state->tokens = state->utt->create_relation(
"Token");
174 state->semstruct = state->utt->create_relation(
"SemStructure");
175 state->emphasis = state->utt->create_relation(
"Emphasis");
176 state->boundary = state->utt->create_relation(
"Boundary");
177 state->pause = state->utt->create_relation(
"Pause");
186 (void)c; (void)p; (void)data;
196 (void)c; (void)p; (void)attributes;
197 Parse_State *state = (Parse_State *)data;
201 if (strcmp(name,
"turnallocation")==0)
207 if (strcmp(name,
"apml")==0)
212 if( strcmp(name,
"performative")==0
213 || strcmp(name,
"rheme")==0
214 || strcmp(name,
"theme")==0
215 || strcmp(name,
"emphasis")==0
216 || strcmp(name,
"boundary")==0
217 || strcmp(name,
"pause")==0)
225 for(them.
begin(attributes); them ; them++)
234 if( strcmp(name,
"emphasis")==0 )
236 item = state->emphasis->append();
237 state->pending = item;
239 else if(strcmp(name,
"boundary")==0 )
241 item = state->boundary->append();
242 if(state->last_token)
245 else if(strcmp(name,
"pause")==0 )
247 item = state->pause->append();
248 if(state->last_token)
253 if (state->parent ==
NULL)
254 item = state->semstruct->append();
256 item = state->parent->append_daughter();
265 EST_warning(
"APML Parser: unknown element %s", name);
275 (void)c; (void)p; (void)attributes;
277 element_open(c, p, data, name, attributes);
278 element_close(c, p, data, name);
287 (void)c; (void)p; (void)name;
288 Parse_State *state = (Parse_State *)data;
290 if ( strcmp(name,
"emphasis")==0
291 || strcmp(name,
"boundary")==0
292 || strcmp(name,
"pause")==0 )
299 if (strcmp(name,
"performative")==0
300 || strcmp(name,
"theme")==0
301 || strcmp(name,
"rheme")==0)
304 state->pending =
NULL;
305 state->parent=state->parent->up();
318 Parse_State *state = (Parse_State *)data;
321 split(chars,strings,255,
RXwhite);
328 while( s < 1 || strings[s].length() > 0 )
330 if(strings[s].length() > 0 )
333 if(strings[s].matches(RXpunc))
335 state->last_token->set(
"punc",strings[s]);
343 if (state->parent ==
NULL)
344 item = state->semstruct->append();
346 item = state->parent->append_daughter();
350 int i = strings[s].
index(RXpunc);
353 if( ps.
length() > 0 && i == 0)
355 cout <<
"Got pre punc: " << ps << endl;
356 intermediate = strings[s].
after(RXpunc);
358 item->
set(
"prepunctuation",ps);
362 intermediate = strings[s];
363 item->
set(
"prepunctuation",
"");
366 ps = intermediate.
at(RXpunc);
369 cout <<
"Got punc: " << ps << endl;
371 item->
set(
"punc",ps);
376 item->
set(
"punc",
"");
379 state->tokens->append(item);
380 state->last_token = item;
384 state->pending->append_daughter(item);
403 (void)c; (void)p; (void)data; (void)chars;
413 const char *instruction)
416 Parse_State *state = (Parse_State *)data;
418 printf(
"APML XML Parser [proc[%s]] %d\n", instruction, state->depth);
426 (void)c; (void)p; (void)data;
429 EST_error(
"APML Parser %s", get_error(p));
EST_read_status apml_read(FILE *file, const EST_String &name, EST_Utterance &u, int &max_id)
void set_contents(EST_Item_Content *li)
#define END_CATCH_ERRORS()
void clear()
remove everything in utterance
The file was read in successfully.
EST_Item * append_daughter(EST_Item *li=0)
void set(const EST_String &name, ssize_t ival)
A Regular expression class to go with the CSTR EST_String class.
void set(const EST_String &name, int ival)
A specialised hash table for when the key is an EST_String.
size_t index(const char *s, ssize_t pos=0) const
Position of substring (starting at pos)
EST_Track error(EST_Track &ref, EST_Track &test, int relax=0)
EST_Features f
General features for this item.
#define est_error_throw()
EST_Regex RXwhite("[ \n\t\r]+")
White space.
void track_context(bool flag)
The file exists but is not in the format specified.
void set_name(const EST_String &s)
set name
size_t length(void) const
Length of string ({not} length of underlying chunk)
void begin(const Container &over)
Set the iterator ready to run over this container.
EST_String after(int pos, int len=1) const
Part after pos+len.
EST_String before(int pos, int len=0) const
Part before position.
EST_String at(int from, int len=0) const
Return part at position.
EST_Item * parent(const EST_Item *n)
return parent of n