Edinburgh Speech Tools  2.1-release
EST_Utterance.cc
Go to the documentation of this file.
1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1995,1996 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author : Alan W Black */
34 /* Date : May 1998 */
35 /*-----------------------------------------------------------------------*/
36 /* EST_Utterance class source file */
37 /* */
38 /*=======================================================================*/
39 #include <cstdlib>
40 #include <cstdio>
41 #include <iostream>
42 #include <fstream>
43 #include "EST_error.h"
44 #include "EST_string_aux.h"
46 #include "EST_UtteranceFile.h"
47 #include "EST_string_aux.h"
48 
49 using namespace std;
50 
51 const EST_String DEF_FILE_TYPE = "est_ascii";
52 
53 static void clear_up_sisilist(EST_TKVL<EST_Item_Content *,EST_Item *> &s);
54 static EST_Item *map_ling_item(EST_Item *si,
56  EST_Item *> &s);
57 static void copy_relation(EST_Item *to,EST_Item *from,
59 
61 
62 #if defined(INSTANTIATE_TEMPLATES)
63 
64 #include "../base_class/EST_TList.cc"
65 #include "../base_class/EST_TKVL.cc"
66 
68 #endif
69 
71 {
72  init();
73 }
74 
76 {
77  highest_id = 0;
78  f.set("max_id", 0);
79 }
80 
82 {
83  int i = f.val("max_id").Int();
84  f.set("max_id", i+1);
85  return i+1;
86 }
87 
89 {
90  relations.clear();
91 }
92 
94 {
96  for (p.begin(relations); p; p++)
97  ::relation(p->v)->clear();
98 }
99 
101 {
102  EST_Relation *r = relation(n,FALSE);
103  if (r) // there is one already, so clear it
104  r->clear();
105  else
106  {
107  r = new EST_Relation(n);
108  r->set_utt(this);
109  relations.set_val(n,est_val(r));
110  }
111 
112  return r;
113 }
114 
115 static EST_Item *item_id(EST_Item *p, const EST_String &n)
116 {
117  EST_Item *s, *t;
118 
119  t = 0;
120  if ((p == 0) || (p->S("id","0") == n))
121  return p;
122 
123  for (s = daughter1(p); s; s = s->next())
124  {
125  t = item_id(s, n);
126  if (t != 0)
127  return t;
128  }
129 
130  return 0;
131 }
132 
134 {
135  EST_Item *s, *t;
137 
138  for (p.begin(relations); p; p++)
139  for (s = ::relation(p->v)->head(); s; s = next_item(s))
140  if ((t = item_id(s, n)) != 0)
141  return t;
142  EST_error("Could not find item matching id %s\n", (const char *)n);
143  return 0;
144 }
145 
147 {
149 
150  for (p.begin(relations); p; p++)
151  ::relation(p->v)->evaluate_item_features();
152 }
153 
155 {
156  EST_Relation *r = relation(n,FALSE);
157 
158  if (r != 0)
159  relations.remove(n);
160 }
161 
163 {
164  if (err)
165  return ::relation(relations.f(name));
166  else
167  {
168  EST_Relation *r = 0;
169  return ::relation(relations.f(name,est_val(r)));
170  }
171 }
172 
174 {
175  if (!name.contains("("))
176  return relations.present(name);
177  EST_StrList s;
178  BracketStringtoStrList(name, s);
179  return relation_present(s);
180 }
181 
183 {
184  for (EST_Litem *p = names.head(); p ; p = p->next())
185  if (!relations.present(names(p)))
186  return false;
187  return true;
188 }
189 
191 {
192  copy(s);
193  return *this;
194 }
195 
196 ostream& operator << (ostream &st, const EST_Utterance &u)
197 {
198  u.save(st,"est_ascii");
199  return st;
200 }
201 
202 void EST_Utterance::copy(const EST_Utterance &u)
203 {
204  // Make a copy of the utterance
206  EST_Relation *nrel;
207  EST_Item *rnode;
208 
209  clear();
210  f = u.f;
211 
213  for (r.begin(u.relations); r; r++)
214  {
215  EST_Relation *rr = ::relation(r->v);
216  nrel = create_relation(rr->name());
217  nrel->f = rr->f;
218  if (rr->head() != 0)
219  {
220  rnode = nrel->append(map_ling_item(rr->head(),sisilist));
221  copy_relation(rnode,rr->head(),sisilist);
222  }
223  }
224  clear_up_sisilist(sisilist);
225 }
226 
227 static void extra_sub_utterance(EST_Utterance &u,EST_Item *i)
228 {
229  sub_utterance(u,i);
230 }
231 
233 {
234  extra_sub_utterance(*this,i);
235 }
236 
237 static void merge_tree(EST_Relation *urel,
238  EST_Relation *rel,
239  EST_Item *uroot,
240  EST_Item *root,
241  EST_Features &items,
242  EST_String feature)
243 {
244  EST_Item *n=0;
245  merge_features(uroot->features(), root->features());
246  // copy horizontally
247  if (root->next()!= NULL)
248  {
249  EST_Item *old = item(items.f(root->next()->S(feature),est_val(n)));
250  EST_Item *new_root = old?uroot->insert_after(old):uroot->insert_after();
251  merge_tree(urel, rel, new_root, root->next(), items, feature);
252  }
253  // vertically
254  if (root->down()!= NULL)
255  {
256  EST_Item *old = item(items.f(root->down()->S(feature),est_val(n)));
257  EST_Item *new_root = old?uroot->insert_below(old):uroot->insert_below();
258  merge_tree(urel, rel, new_root, root->down(), items, feature);
259  }
260 }
261 
263  EST_Utterance &extra,
264  EST_String feature)
265 {
266  // Global merge. Uses the feature to determine which items correspond.
267 
268  // First build a table of existing contents.
269 
270  EST_Features items;
272  for(ri.begin(utt.relations); ri; ri++)
273  {
274  EST_Relation *rel = relation(ri->v);
275  for(EST_Item *i=rel->head(); i != NULL; i=i->next_item())
276  {
277  EST_String id = i->S(feature);
278  items.set_val(id,est_val(i));
279  }
280  }
281 
283  for(eri.begin(extra.relations); eri; eri++)
284  {
285  EST_Relation *rel = relation(eri->v);
286  EST_String rel_name = rel->name();
287 
288  while (utt.relation_present(rel_name))
289  rel_name += "+";
290 
291  EST_Relation *urel = utt.create_relation(rel_name);
292 
293  if (rel->head() != NULL)
294  {
295  EST_Item *n = 0;
296  EST_Item *old = item(items.f(rel->head()->S(feature),est_val(n)));
297  EST_Item *new_root = old?urel->append(old):urel->append();
298  merge_tree(urel, rel, new_root, rel->head(), items, feature);
299  }
300  }
301 
302  return TRUE;
303 }
304 
306  EST_Utterance &sub_utt,
307  EST_Item *utt_root,
308  EST_Item *sub_root)
309 {
310  // Joins sub_utt to utt at ling_item at, merging the root
311  // of relname in sub_utt with ling_item at. All other relations
312  // in sub_utt get their root's appended (not merged) with the
313  // corresponding relations in utt (and created if necessary).
315  EST_Item *rnode;
316  EST_Relation *nrel;
317 
318  if (utt_root->relation_name() != sub_root->relation_name())
319  EST_error("utterance_merge: items not is same relation");
320 
321  if ((utt_root == 0) || (sub_root == 0))
322  EST_error("utterance_merge: items are null");
323 
324  // merge features but preserve root id
325  EST_String root_id = utt_root->S("id");
326  merge_features(utt_root->features(), sub_root->features());
327  utt_root->set("id", root_id);
328  // in case root item in sub is referenced elsewhere in the structure
329  sisilist.add_item(sub_root->contents(),utt_root);
330  copy_relation(utt_root,sub_root,sisilist);
331 
333  for (r.begin(sub_utt.relations); r; r++)
334  {
335  EST_Relation *rr = ::relation(r->v);
336  if (rr->name() != utt_root->relation_name())
337  {
338  if (!utt.relation_present(rr->name()))
339  nrel = utt.create_relation(rr->name());
340  else
341  nrel = utt.relation(rr->name());
342  if (rr->head() != 0)
343  {
344  EST_Item *nn = map_ling_item(rr->head(),sisilist);
345  rnode = nrel->append(nn);
346  copy_relation(rnode,rr->head(),sisilist);
347  }
348  }
349  }
350  sisilist.remove_item(sub_root->contents());
351  clear_up_sisilist(sisilist);
352  return TRUE;
353 }
354 
355 static void copy_relation(EST_Item *to,EST_Item *from,
357 {
358  // Construct next and down nodes of from, into to, mapping
359  // stream_items through slist
360 
361  if (from->next())
362  copy_relation(to->insert_after(map_ling_item(from->next(),slist)),
363  from->next(),
364  slist);
365  if (from->down())
366  copy_relation(to->insert_below(map_ling_item(from->down(),slist)),
367  from->down(),
368  slist);
369 }
370 
371 static EST_Item *map_ling_item(EST_Item *si,
373 {
374  // If si is already in s return its map otherwise copy
375  // si and add it to the list
376  EST_Item *msi;
377  EST_Item *def = 0;
378 
379  msi = s.val_def(si->contents(),def);
380  if (msi == def)
381  { // First time, so copy it and add to map list
382  msi = new EST_Item(*si);
383  msi->f_remove("id");
384  s.add_item(si->contents(),msi);
385  }
386  return msi;
387 }
388 
389 static void clear_up_sisilist(EST_TKVL<EST_Item_Content *,EST_Item *> &s)
390 {
391  // The EST_Items in the value of this need to be freed, its
392  // contents however will not be freed as they will be referenced
393  // somewhere in the copied utterance
394 
395  for (EST_Litem *r=s.list.head(); r != 0; r=r->next())
396  delete s.list(r).v;
397 
398 }
399 
400 static EST_Item *mapped_parent(EST_Item *i,const EST_String &relname,
402 {
403  EST_Item *p;
404 
405  if ((p=parent(i,relname)) == 0)
406  return 0;
407  else if (s.present(p->contents()))
408  return map_ling_item(p,s)->as_relation(relname);
409  else
410  return 0;
411 }
412 
413 static void sub_utt_copy(EST_Utterance &sub,EST_Item *i,
415 {
416  if (s.present(i->contents()))
417  return;
418  else
419  {
420  EST_Item *np,*d;
421  EST_Litem *r;
422  EST_Item *ni = map_ling_item(i,s);
423  for (r = i->relations().list.head(); r; r = r->next())
424  {
425  EST_String relname = i->relations().list(r).k;
426  if (!sub.relation_present(relname))
427  sub.create_relation(relname)->append(ni);
428  else if ((np=mapped_parent(i,relname,s)) != 0)
429  np->append_daughter(ni);
430  else
431  sub.relation(relname)->append(ni);
432 
433  // Do its daughters
434  for (d = daughter1(i,relname); d ; d=d->next())
435  sub_utt_copy(sub,d,s);
436  }
437  }
438 }
439 
441 {
442  // Extract i and all its relations, and daughters ... to build
443  // a new utterance in sub.
445 
446  sub.clear();
447  sub_utt_copy(sub,i,sisilist);
448 
449  clear_up_sisilist(sisilist);
450 }
451 
453 {
454  EST_TokenStream ts;
456 
457  if (((filename == "-") ? ts.open(cin) : ts.open(filename)) != 0)
458  {
459  cerr << "load_utt: can't open utterance input file "
460  << filename << endl;
461  return misc_read_error;
462  }
463 
464  v = load(ts);
465 
466  if (v == read_ok)
467  f.set("filename", filename);
468 
469  ts.close();
470 
471  return v;
472 }
473 
475 {
477  int pos = ts.tell();
478  int max_id=-2, n;
479  int num_formats = EST_UtteranceFile::map.n();
480  init(); // we're committed to reading something so clear utterance
481 
482  if (num_formats <= 0)
483  {
484  EST_error("There is not a single UtteranceFile format declared \
485  in EST source code. This should not happen!");
486  return misc_read_error;
487  }
488 
489  for(n=0; n< num_formats ; n++)
490  {
492 
493  if (t == uff_none)
494  continue;
495 
496  EST_UtteranceFile::Info *info = &(EST_UtteranceFile::map.info(t));
497 
498  if (! info->recognise)
499  continue;
500 
501  EST_UtteranceFile::Load_TokenStream * l_fun = info->load;
502 
503  if (l_fun == NULL)
504  continue;
505 
506  if (ts.seek(pos) != 0) {
507  cerr << "load utterance: read error." << endl;
508  return misc_read_error;
509  }
510 
511  stat = (*l_fun)(ts, *this, max_id);
512 
513  if (stat == read_ok)
514  {
515  // set_file_type(EST_UtteranceFile::map.value(t));
516  break;
517  }
518  }
519 
520  highest_id = max_id;
521  return stat;
522 }
523 
525  const EST_String &type) const
526 {
528  ostream *outf;
529 
530  if (filename == "-")
531  outf = &cout;
532  else
533  outf = new ofstream(filename);
534 
535  if (!(*outf))
536  return write_fail;
537 
538  v = save(*outf,type);
539 
540  if (outf != &cout)
541  delete outf;
542 
543  return v;
544 }
545 
547  const EST_String &type) const
548 {
549  EST_String save_type = (type == "") ? DEF_FILE_TYPE : type;
550 
552 
553  if (t == uff_none)
554  {
555  cerr << "Utterance: unknown filetype in saving " << save_type << endl;
556  return write_fail;
557  }
558 
560 
561  if (s_fun == NULL)
562  {
563  cerr << "Can't save utterances to files type " << save_type << endl;
564  return write_fail;
565  }
566 
567  return (*s_fun)(outf, *this);
568 }
569 
571  EST_String &flat_repr )
572 {
573  EST_Item *phrase = utt.relation("Phrase")->head();
574  for( ; phrase; phrase=phrase->next() ){
575  flat_repr += "<";
576 
577  EST_Item *word = daughter1(phrase);
578  for( ; word; word=word->next() ){
579  flat_repr += "{";
580 
581  EST_Item *syllable = daughter1(word, "SylStructure");
582  for( ; syllable; syllable=syllable->next() ){
583  flat_repr += EST_String::cat( "(", syllable->S("stress") );
584 
585  EST_Item *phone = daughter1(syllable);
586  for( ; phone; phone=phone->next() )
587  flat_repr += EST_String::cat( " ", phone->S("name"), " " );
588  flat_repr += ")";
589  }
590  flat_repr += "}";
591  }
592  flat_repr += EST_String::cat( "> _", phrase->S("name"), " " );
593  }
594 }
EST_Item * insert_below(EST_Item *li=0)
Definition: EST_Item.cc:280
EST_Item * head() const
Definition: EST_Relation.h:121
void clear_relations()
clear the contents of the relations only
EST_Item * next_item(const EST_Item *node)
Definition: EST_Item.h:427
int contains(const char *s, ssize_t pos=-1) const
Does it contain this substring?
Definition: EST_String.h:365
EST_write_status save(const EST_String &filename, const EST_String &type="est_ascii") const
EST_read_status load(const EST_String &filename)
EST_write_status
EST_Utterance & operator=(const EST_Utterance &s)
void clear()
remove everything in utterance
The file was read in successfully.
EST_Item * append_daughter(EST_Item *li=0)
Definition: EST_Item.cc:425
const EST_String DEF_FILE_TYPE
EST_Relation * create_relation(const EST_String &relname)
create a new relation called n.
void set_val(const EST_String &name, const EST_Val &sval)
Definition: EST_Features.h:217
void set(const EST_String &name, ssize_t ival)
Definition: EST_Item.h:185
EST_Val est_val(const EST_Item_featfunc f)
Definition: item_feats.cc:122
bool save(Lattice &lattice, EST_String filename)
void BracketStringtoStrList(EST_String s, EST_StrList &l, EST_String sep)
Convert a EST_String enclosed in a single set of brackets to a EST_StrList by separating tokens in s ...
void close(void)
Close stream.
Definition: EST_Token.cc:419
EST_Item * next_item() const
Definition: EST_Item.cc:368
EST_FilePos tell(void) const
tell, synonym for filepos
Definition: EST_Token.h:369
bool load(Lattice &lattice, EST_String filename)
ostream & operator<<(ostream &st, const EST_Utterance &u)
EST_Features f
Utterance level features.
void evaluate_all_features()
Evaluate all feature functions in utterance.
EST_read_status Load_TokenStream(LoadUtterance_TokenStreamArgs)
EST_Item * root(const EST_Item *n)
return root node of treeprevious sibling (sister) of n
EST_Features relations
The list of named relations.
void copy_relation(const EST_Relation &from, EST_Relation &to)
EST_Item * as_relation(const char *relname) const
View item from another relation (const char *) method.
Definition: EST_Item.h:316
EST_UItem * next()
Definition: EST_UList.h:55
ENUM token(VAL value) const
int open(const EST_String &filename)
open a EST_TokenStream for a file.
Definition: EST_Token.cc:213
static EST_String cat(const EST_String s1, const EST_String s2=Empty, const EST_String s3=Empty, const EST_String s4=Empty, const EST_String s5=Empty, const EST_String s6=Empty, const EST_String s7=Empty, const EST_String s8=Empty, const EST_String s9=Empty)
Definition: EST_String.cc:1084
static EST_TNamedEnumI< EST_UtteranceFileType, Info > map
void set_utt(EST_Utterance *u)
Definition: EST_Relation.h:115
An error occurred while reading.
void err(const char *message, LISP x) EST_NORETURN
Definition: slib.cc:608
const EST_Val & f(const EST_String &path) const
Definition: EST_Features.h:115
EST_Utterance()
default constructor
const V & val_def(const K &rkey, const V &def) const
value or default
Definition: EST_TKVL.cc:151
EST_Item * id(const EST_String &n) const
return EST_Item whose id is n.
const EST_String & name() const
Definition: EST_Relation.h:118
EST_FMatrix sub(const EST_FMatrix &a, ssize_t row, ssize_t col)
Definition: vec_mat_aux.cc:187
const EST_String & relation_name() const
The relation name of this particular item.
Definition: EST_Item.cc:196
EST_TKVL< EST_String, EST_Val > & relations()
Access to the relation links.
Definition: EST_Item.h:324
int n(void) const
EST_TList< EST_TKVI< K, V > > list
Linked list of key-val pairs. Don&#39;t use this as it will be made private in the future.
Definition: EST_TKVL.h:94
EST_Item_Content * contents() const
Definition: EST_Item.h:244
#define FALSE
Definition: EST_bool.h:119
Declare_KVL_T(EST_Item_Content *, EST_Item *, KVL_ICP_IP) EST_Utterance
int remove_item(const K &rkey, int quiet=0)
remove key and val pair from list
Definition: EST_TKVL.cc:263
#define misc_read_error
EST_TVector< T > & copy(EST_TVector< T > a, const EST_TList< T > &in)
NULL
Definition: EST_WFST.cc:55
Templated Key-Value list. Objects of type EST_TKVL contain lists which are accessed by a key of type ...
Definition: EST_TKVL.h:73
#define EST_error
Definition: EST_error.h:104
EST_Item * insert_after(EST_Item *li=0)
Definition: EST_Item.cc:236
f
Definition: EST_item_aux.cc:48
EST_Item * down() const
Definition: EST_Item.h:352
EST_Features & features() const
Definition: EST_Item.h:258
The file was not written successfully.
Save_TokenStream * save
int utterance_merge(EST_Utterance &utt, EST_Utterance &extra, EST_String feature)
EST_Item * next() const
Definition: EST_Item.h:348
int add_item(const K &rkey, const V &rval, int no_search=0)
add key-val pair to list
Definition: EST_TKVL.cc:248
void sub_utterance(EST_Utterance &sub, EST_Item *i)
EST_read_status
INFO & info(ENUM token) const
EST_Features f
Definition: EST_Relation.h:101
void init()
initialise utterance
void sub_utterance(EST_Item *i)
EST_UtteranceFileType
void begin(const Container &over)
Set the iterator ready to run over this container.
#define Instantiate_KVL_T(KEY, VAL, TAG)
Definition: EST_TKVLI.h:52
#define format_ok
int present(const K &rkey) const
Returns true if key is present.
Definition: EST_TKVL.cc:222
int next_id()
return the id of the next item
EST_UItem * head() const
Definition: EST_UList.h:97
const EST_String S(const EST_String &name) const
Definition: EST_Item.h:144
EST_Relation * relation(const char *name, int err_on_not_found=1) const
get relation by name
EST_Item * append(EST_Item *si)
Definition: EST_Relation.cc:88
bool relation_present(const EST_String name) const
void utt_2_flat_repr(const EST_Utterance &utt, EST_String &flat_repr)
void f_remove(const EST_String &name)
Definition: EST_Item.h:228
#define TRUE
Definition: EST_bool.h:118
EST_Item * parent(const EST_Item *n)
return parent of n
int seek(int position)
seek, reposition file pointer
Definition: EST_Token.cc:318
EST_Item * daughter1(const EST_Item *n)
return first daughter of n
void remove_relation(const EST_String &relname)
remove the relation called n.
void merge_features(EST_Features &to, EST_Features &from)
EST_write_status Save_TokenStream(SaveUtterance_TokenStreamArgs)
Utility EST_String Functions header file.