Edinburgh Speech Tools  2.1-release
relation_io.cc
Go to the documentation of this file.
1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1995,1996 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author : Paul Taylor updated by awb */
34 /* Date : Feb 1999 */
35 /*-----------------------------------------------------------------------*/
36 /* Relation class file i/o, label files */
37 /* */
38 /*=======================================================================*/
39 #include <cstdlib>
40 #include <cstdio>
41 #include <fstream>
42 #include "EST_unix.h"
43 #include "EST_types.h"
45 #include "EST_string_aux.h"
46 #include "EST_cutils.h"
47 #include "EST_TList.h"
48 #include "EST_Option.h"
49 #include "relation_io.h"
50 
51 using namespace std;
52 
53 #define DEF_SAMPLE_RATE 16000
54 #define HTK_UNITS_PER_SECOND 10000000
55 
56 static EST_Regex RXleadingwhitespace("^[ \t\n\r][ \t\n\r]*.*$");
57 
59  int sample);
60 
62 {
63  ts.set_SingleCharSymbols(";");
64  ts.set_quotes('"','\\');
65  EST_String key, val;
66 
67  // Skip the header
68  while (!ts.eof())
69  {
70  key = ts.get().string();
71  if (key == "#")
72  break;
73 
74  val = ts.get_upto_eoln().string();
75  // delete leading whitespace
76  if (val.matches(RXleadingwhitespace))
77  val = val.after(RXwhite);
78  rel.f.set(key, val);
79  }
80 
81  if (ts.peek() == "") return format_ok;
82 
83  while (!ts.eof())
84  {
85  EST_Item *si = rel.append();
86  EST_String name;
87 
88  si->set("end",(float)atof(ts.get().string()));
89  ts.get(); // skip the color;
90 
91  for (name = ""; (!ts.eoln()) && (ts.peek() != ";"); )
92  {
93  EST_Token &t = ts.get();
94  if (name.length() > 0) // preserve internal whitespace
95  name += t.whitespace();
96  name += t.string();
97  }
98  si->set_name(name);
99 
100  if (ts.peek().string() == ";") // absorb separator
101  {
102  ts.get();
103  si->features().load(ts);
104  }
105  }
106  return format_ok;
107 }
108 
110  const EST_Relation &s,
111  bool evaluate_ff)
112 {
113  ostream *outf;
114  if (filename == "-")
115  outf = &cout;
116  else
117  outf = new ofstream(filename);
118 
119  if (!(*outf))
120  {
121  cerr << "save_esps_label: can't open label output file \"" <<
122  filename << "\"" << endl;
123  return write_fail;
124  }
125 
126  EST_write_status st=save_esps_label(outf, s, evaluate_ff);
127 
128  if (outf != &cout)
129  delete outf;
130 
131  return st;
132 }
133 
135  const EST_Relation &s,
136  bool evaluate_ff)
137 {
138  EST_Item *ptr;
139 
140  *outf << "separator ;\n";
141  if (!s.f.present("nfields"))
142  *outf << "nfields 1\n";
143 
145  for (p.begin(s.f); p; ++p)
146  *outf << p->k << " " << p->v << endl;
147 
148  *outf << "#\n";
149 /* if (f("timing_style") == "event")
150  *outf << "timing_style event\n";
151  else if (f("timing_style") == "unit")
152  *outf << "timing_style unit\n";
153 */
154 
155  for (ptr = s.head(); ptr != 0; ptr = ptr->next())
156  {
157  *outf << "\t";
158  std::streamsize oldprecision = outf->precision(5);
159  std::ios_base::fmtflags oldsetf = outf->setf(ios::fixed, ios::floatfield);
160  std::streamsize oldwidth = outf->width(8);
161  // outf->fill('0');
162  if (s.f("timing_style","0") == "event")
163  *outf << ptr->F("time",0);
164  else
165  *outf << ptr->F("end",0);
166 
167  *outf << " 26 \t" << ptr->S("name","0");
168 
169  EST_Features f2;
170  f2 = ptr->features();
171  f2.remove("name");
172  f2.remove("end");
173  if (evaluate_ff)
174  evaluate(ptr,f2);
175 
176  if (f2.length() > 0)
177  {
178  *outf << " ; ";
179  f2.save(*outf);
180  }
181  *outf << endl;
182 
183  outf->precision(oldprecision);
184  outf->setf(oldsetf);
185  outf->width(oldwidth);
186 
187  }
188 
189  return write_ok;
190 }
191 
193 {
194  // This function reads OGI style label files. The start, end
195  // time and names of the labels are mandatory.
196  EST_String key, val;
197  float sr;
198  int isr;
199 
200  // set up the character constant values for this stream
201  ts.set_SingleCharSymbols(";");
202 
203  // Skip over header
204 
205  while(!ts.eof())
206  {
207  if ((ts.peek().col() == 0) && (ts.peek() == "END"))
208  {
209  if (ts.peek() == "END")
210  { // read rest of header
211  ts.get();
212  ts.get();
213  ts.get();
214  }
215  break;
216  }
217  key = ts.get().string();
218  val = ts.get().string();
219  }
220 
221  sr = 1000.0 / atof(val);
222  isr = (int)sr;
223 
224  if (ts.eof())
225  {
226  cerr << "Error: couldn't find header in label file "
227  << ts.filename() << endl;
228  return wrong_format;
229  }
230 
231  if (read_label_portion(ts, s, isr) == misc_read_error)
232  {
233  cerr << "error: in label file " << ts.filename() << " at line " <<
234  ts.linenum() << endl;
235  return misc_read_error;
236  }
237  return format_ok;
238 }
239 
241 {
242  // This function reads label files in the form of simple word strings
243  // with no timing information.
244  EST_Item *item;
245 
246  while (!ts.eof())
247  {
248  item = s.append();
249  item->set("name",(EST_String)ts.get());
250  item->set("end",0.0);
251  }
252 
253  return format_ok;
254 }
255 
256 static float convert_long_num_string_to_time(const char *s,int sample)
257 {
258  // For those label files that think 100 nanosecond times are cool
259  // we have to provide a special function to convert them as
260  // this quickly gets beyond the capabilities of ints.
261 
262  if (strlen(s) < 15)
263  return atof(s)/sample;
264  else
265  {
266  double a = 0,d;
267  int i=0;
268  for (i=0;
269  (strchr(" \n\r\t",s[i]) != NULL) && (s[i] != '\0');
270  i++);
271 
272  for ( ;
273  (s[i] != '\0') && (s[i] >= '0') && (s[i] <= '9');
274  i++)
275  {
276  a = a*10;
277  d = s[i]-'0';
278  a += (d/(double)sample);
279  }
280  return a;
281  }
282 }
283 
285  int sample)
286 {
287  EST_Item *item;
288  /*float hstart;*/
289  float hend;
290  EST_String str;
291 
292  while(!ts.eof())
293  {
294  str = ts.get().string();
295  if (str == ".")
296  return format_ok;
297 
298  item = s.append();
299 
300  /*hstart = convert_long_num_string_to_time(str,sample);*/
301  str = ts.get().string();
302  hend = convert_long_num_string_to_time(str,sample);
303 
304  item->set("end",hend); // time
305  item->set("name",ts.get().string()); // name
306 
307  if (!ts.eoln())
308  item->set("rest_lab",ts.get_upto_eoln().string());
309  }
310 
311  return format_ok;
312 }
313 
315  EST_Relation &s, int sample)
316 {
317 
318  if (sample == 0) // maybe this should be an error
319  sample = DEF_SAMPLE_RATE;
320 
321  // set up the character constant values for this stream
322  ts.set_SingleCharSymbols(";");
323 
324  s.clear();
325  if (read_label_portion(ts, s, sample) == misc_read_error)
326  {
327  cerr << "error: in label file " << ts.filename() << " at line " <<
328  ts.linenum() << endl;
329  return misc_read_error;
330  }
331  return format_ok;
332 }
333 
335  const EST_Relation &a)
336 {
337  ostream *outf;
338  if (filename == "-")
339  outf = &cout;
340  else
341  outf = new ofstream(filename);
342 
343  if (!(*outf))
344  {
345  cerr << "save_htk_label: can't open label output file \"" <<
346  filename << "\"" << endl;
347  return write_fail;
348  }
349 
350  EST_write_status s = save_htk_label(outf, a);
351 
352 
353  if (outf != &cout)
354  delete outf;
355 
356  return s;
357 }
358 
360  const EST_Relation &a)
361 {
362  EST_Item *ptr;
363  float end,start;
364 
365  outf->precision(6);
366 
367  start = end = 0;
368  for (ptr = a.head(); ptr != 0; ptr = ptr->next())
369  {
370  outf->width(15);
371  cout.setf(ios::left,ios::adjustfield);
372  *outf << (int)(start * HTK_UNITS_PER_SECOND);
373  outf->width(15);
374  end = ptr->F("end",0.0);
375  *outf << (int)(end * HTK_UNITS_PER_SECOND);
376  *outf << " " << ptr->name() << endl;
377  start = end;
378  }
379 
380  return write_ok;
381 }
382 
383 #if 0
384 EST_write_status save_label_spn(const EST_String &filename,
385  const EST_Relation &a)
386 {
387  EST_Stream_Item *ptr;
388 
389  ostream *outf;
390  if (filename == "-")
391  outf = &cout;
392  else
393  outf = new ofstream(filename);
394 
395  if (!(*outf))
396  {
397  cerr << "save_label_spn: can't open label output file \""
398  << filename << "\"" << endl;
399  return write_fail;
400  }
401 
402  ptr = a.head();
403  outf->precision(3);
404  outf->setf(ios::left, ios::adjustfield);
405  outf->width(8);
406  *outf << ptr->name();
407  outf->setf(ios::fixed, ios::floatfield);
408  outf->width(8);
409  *outf << (ptr->dur() * 1000.0) << "\t (0,140)" << endl;
410 
411  for (; next(ptr) != 0; ptr = ptr->next())
412  {
413  outf->precision(3);
414  outf->setf(ios::left, ios::adjustfield);
415  outf->width(8);
416  *outf << ptr->name();
417  outf->setf(ios::fixed, ios::floatfield);
418  outf->width(8);
419  *outf << (ptr->dur() * 1000.0) << endl;
420  }
421  // outf->precision(3);
422  // outf->setf(ios::left, ios::adjustfield);
423  outf->width(8);
424  *outf << ptr->name();
425  outf->setf(ios::fixed, ios::floatfield);
426  outf->width(8);
427  *outf << (ptr->dur() * 1000.0) << "\t (99,80)" << endl;
428 
429  if (outf != &cout)
430  delete outf;
431 
432  return write_ok;
433 }
434 
435 EST_write_status save_label_names(const EST_String &filename,
436  const EST_Relation &a,
437  const EST_String &features)
438 {
439  EST_Stream_Item *ptr;
440 
441  ostream *outf;
442  if (filename == "-")
443  outf = &cout;
444  else
445  outf = new ofstream(filename);
446 
447  if (!(*outf))
448  {
449  cerr << "save_label_name: can't open label output file \""
450  << filename << "\"" << endl;
451  return misc_write_error;
452  }
453 
454  for (ptr = a.head(); next(ptr) != 0; ptr = ptr->next())
455  {
456  *outf << ptr->name();
457  if ((features != "") && (features != "OneLine"))
458  *outf << endl;
459  else
460  *outf << " ";
461  }
462 
463  *outf << ptr->name() << endl;
464 
465  if (outf != &cout)
466  delete outf;
467  return write_ok;
468 }
469 #endif
470 
472  const EST_RelationList &plist,
473  int time, int path)
474 {
475  EST_Litem *p;
476  EST_Item *ptr;
477  EST_String outname;
478  float start,end;
479 
480  ostream *outf;
481  if (filename == "-")
482  outf = &cout;
483  else
484  outf = new ofstream(filename);
485 
486  if (!(*outf))
487  {
488  cerr << "save_StreamList: can't open MLF output file \""
489  << filename << "\"\n";
490  return write_fail;
491  }
492 
493  *outf << "#!MLF!#\n"; // MLF header/identifier
494  outf->precision(6);
495 
496  start = end = 0;
497  for (p = plist.head(); p != 0; p = p->next())
498  {
499  outname = path ? plist(p).name() : basename(plist(p).name());
500  *outf << "\"*/" << outname<<"\"\n";
501  for (ptr = plist(p).head(); ptr != 0; ptr = ptr->next())
502  {
503  if (time)
504  {
505  outf->width(15);
506  cout.setf(ios::left,ios::adjustfield);
507  *outf << (int)(start * HTK_UNITS_PER_SECOND);
508  outf->width(15);
509  end = ptr->F("end",0.0);
510  *outf << (int)(end * HTK_UNITS_PER_SECOND) << " ";
511  start = end;
512  }
513  *outf << ptr->S("name","0") << endl;
514  }
515  *outf << ".\n";
516  }
517 
518  if (outf != &cout)
519  delete outf;
520  return write_ok;
521 }
522 
524  const EST_RelationList &plist,
525  int style)
526 {
527  EST_Litem *p;
528  EST_Item *ptr;
529 
530  ostream *outf;
531  if (filename == "-")
532  outf = &cout;
533  else
534  outf = new ofstream(filename);
535 
536  if (!(*outf))
537  {
538  cerr << "save:WordList: can't open WordList output file \""
539  << filename << "\"\n";
540  return write_fail;
541  }
542 
543  for (p = plist.head(); p != 0; p = p->next())
544  {
545  for (ptr = plist(p).head(); ptr->next() != 0; ptr = ptr->next())
546  {
547  *outf << ptr->name();
548  if (style == 0)
549  *outf << endl;
550  else
551  *outf << " ";
552  }
553  if (ptr != 0)
554  *outf << ptr->name() << endl;
555  }
556 
557  if (outf != &cout)
558  delete outf;
559  return write_ok;
560 }
561 
563  const EST_RelationList &plist,
564  const EST_String &features,
565  int path)
566 {
567  EST_Litem *p;
568  EST_String outname;
569  (void) filename;
570  (void) features;
571 
572  for (p = plist.head(); p != 0; p = p->next())
573  {
574  outname = path ? plist(p).name() : basename(plist(p).name());
575  if (plist(p).save(outname,false) != write_ok)
576  return misc_write_error;
577  }
578 
579  return write_ok;
580 }
581 
583  EST_RelationList &plist)
584 {
585  EST_TokenStream ts;
586  EST_String fns, name;
587 
588  if (((filename == "-") ? ts.open(cin) : ts.open(filename)) != 0)
589  {
590  cerr << "Can't open label input file " << filename << endl;
591  return misc_read_error;
592  }
593  // set up the character constant values for this stream
594  ts.set_SingleCharSymbols(";");
595 
596  // Skip over header
597  if (ts.get().string() != "#!MLF!#")
598  {
599  cerr << "Not MLF file\n";
600  return wrong_format;
601  }
602 
603  while(!ts.eof())
604  {
605  // put filename in as stream name. The filename is usually surrounded
606  // by quotes, so remove these.
607  fns = ts.get().string();
608  strip_quotes(fns);
609  EST_Relation s(fns);
610  s.f.set("name", fns); // simonk
611  plist.append(s);
612 
613  if (read_label_portion(ts, plist.last(), 10000000) == misc_read_error)
614  {
615  cerr << "error: in reading MLF file\n";
616  cerr << "section for file " << fns <<
617  " at line " << ts.linenum() << " is badly formatted\n";
618 
619  return misc_read_error;
620  }
621  }
622 
623  return format_ok;
624 }
625 
626 static void pad_ends(EST_Relation &s, float length)
627 {
628  // add evenly spaced dummy end values to Relation
629  EST_Item *p;
630  int i;
631 
632  for (i = 0, p = s.head(); p; p = p->next(), ++i)
633  p->set("end",(length * float(i)/float(s.length())));
634 }
635 
637  EST_StrList &files, EST_Option &al)
638 {
639  EST_Litem *p, *plp;
640 
641  if (al.val("-itype", 0) == "mlf")
642  {
643  if (load_RelationList(files.first(), plist) != format_ok)
644  exit (-1);
645  }
646  else
647  for (p = files.head(); p; p = p->next())
648  {
649  EST_Relation s(files(p));
650  plist.append(s);
651  plp = plist.tail();
652  if (al.present("-itype"))
653  {
654  if (plist(plp).load(files(p), al.val("-itype")) != format_ok)
655  exit (-1);
656  }
657  else if (plist(plp).load(files(p)) != format_ok)
658  exit (-1);
659  if ((al.val("-itype", 0) == "words") && (al.present("-length")))
660  pad_ends(s, al.fval("-length"));
661 
662  }
663 
664  return format_ok;
665 }
EST_Item * head() const
Definition: EST_Relation.h:121
EST_TokenStream & get(EST_Token &t)
get next token in stream
Definition: EST_Token.cc:499
float end(const EST_Item &item)
Definition: EST_item_aux.cc:96
const EST_String name() const
Definition: EST_Item.h:250
EST_write_status
#define HTK_UNITS_PER_SECOND
Definition: relation_io.cc:54
void set(const EST_String &name, ssize_t ival)
Definition: EST_Item.h:185
A Regular expression class to go with the CSTR EST_String class.
Definition: EST_Regex.h:56
STATIC void left(STATUS Change)
Definition: editline.c:523
void set_SingleCharSymbols(const EST_String &sc)
set which characters are to be treated as single character symbols
Definition: EST_Token.h:344
bool save(Lattice &lattice, EST_String filename)
const T & last() const
return const reference to last item in list
Definition: EST_TList.h:155
const EST_String filename() const
The originating filename (if there is one)
Definition: EST_Token.h:378
EST_read_status load(EST_TokenStream &ts)
load features from already opened EST_TokenStream
bool load(Lattice &lattice, EST_String filename)
const EST_String & whitespace()
Definition: EST_Token.h:112
int length() const
float fval(const EST_String &rkey, int m=1) const
Definition: EST_Option.cc:104
void set(const EST_String &name, int ival)
Definition: EST_Features.h:186
EST_UItem * next()
Definition: EST_UList.h:55
int open(const EST_String &filename)
open a EST_TokenStream for a file.
Definition: EST_Token.cc:213
int eof()
end of file
Definition: EST_Token.h:362
EST_read_status load_esps_label(EST_TokenStream &ts, EST_Relation &rel)
Definition: relation_io.cc:61
void set_name(const EST_String &name) const
Definition: EST_Item.h:254
int length() const
Definition: EST_Features.h:251
EST_write_status save_RelationList(const EST_String &filename, const EST_RelationList &plist, int time, int path)
Definition: relation_io.cc:471
#define misc_write_error
EST_write_status save_WordList(const EST_String &filename, const EST_RelationList &plist, int style)
Definition: relation_io.cc:523
float F(const EST_String &name) const
Definition: EST_Item.h:135
EST_read_status load_words_label(EST_TokenStream &ts, EST_Relation &s)
Definition: relation_io.cc:240
EST_Regex RXwhite("[ \n\t\r]+")
White space.
void evaluate(EST_Item *a, EST_Features &f)
Definition: EST_Item.cc:636
int linenum(void) const
returns line number of EST_TokenStream
Definition: EST_Token.h:360
The file was written successfully.
#define wrong_format
void remove(const EST_String &name)
Definition: EST_Features.h:247
EST_read_status load_ogi_label(EST_TokenStream &ts, EST_Relation &s)
Definition: relation_io.cc:192
#define misc_read_error
float time(const EST_Item &item)
Definition: EST_item_aux.cc:82
NULL
Definition: EST_WFST.cc:55
int present(const EST_String &name) const
int col(void) const
Line position in original EST_TokenStream.
Definition: EST_Token.h:187
const T & first() const
return const reference to first item in list
Definition: EST_TList.h:152
EST_Token & peek(void)
peek at next token
Definition: EST_Token.h:332
EST_Features & features() const
Definition: EST_Item.h:258
EST_write_status save(ostream &outf) const
save features in already opened ostream
void set_quotes(char q, char e)
set characters to be used as quotes and escape, and set quote mode
Definition: EST_Token.h:353
int matches(const char *e, ssize_t pos=0) const
Exactly match this string?
Definition: EST_String.cc:651
getString int
Definition: EST_item_aux.cc:50
The file was not written successfully.
const V & val(const K &rkey, bool m=0) const
return value according to key (const)
Definition: EST_TKVL.cc:145
EST_UItem * tail() const
Definition: EST_UList.h:99
void append(const T &item)
add item onto end of list
Definition: EST_TList.h:196
EST_String basename(EST_String full, EST_String ext="")
This acts like the bourne shell basename command. By default, it strips any leading path from a strin...
Definition: util_io.cc:167
EST_read_status load_RelationList(const EST_String &filename, EST_RelationList &plist)
Definition: relation_io.cc:582
EST_Item * next() const
Definition: EST_Item.h:348
EST_read_status read_label_portion(EST_TokenStream &ts, EST_Relation &s, int sample)
Definition: relation_io.cc:284
EST_read_status
float start(const EST_Item &item)
Definition: EST_item_aux.cc:52
EST_Features f
Definition: EST_Relation.h:101
size_t length(void) const
Length of string ({not} length of underlying chunk)
Definition: EST_String.h:231
EST_write_status save_esps_label(const EST_String &filename, const EST_Relation &s, bool evaluate_ff)
Definition: relation_io.cc:109
const EST_String & string() const
Definition: EST_Token.h:120
void begin(const Container &over)
Set the iterator ready to run over this container.
#define format_ok
int present(const K &rkey) const
Returns true if key is present.
Definition: EST_TKVL.cc:222
EST_write_status save_htk_label(const EST_String &filename, const EST_Relation &a)
Definition: relation_io.cc:334
EST_Token get_upto_eoln(void)
get up to s in end of line as a single token.
Definition: EST_Token.cc:529
void strip_quotes(EST_String &s, const EST_String quote_char="\"")
remove quotes from a string
Definition: util_io.cc:191
EST_UItem * head() const
Definition: EST_UList.h:97
EST_write_status save_ind_RelationList(const EST_String &filename, const EST_RelationList &plist, const EST_String &features, int path)
Definition: relation_io.cc:562
const EST_String S(const EST_String &name) const
Definition: EST_Item.h:144
EST_read_status read_RelationList(EST_RelationList &plist, EST_StrList &files, EST_Option &al)
Definition: relation_io.cc:636
EST_Item * append(EST_Item *si)
Definition: EST_Relation.cc:88
EST_String after(int pos, int len=1) const
Part after pos+len.
Definition: EST_String.h:308
EST_read_status load_sample_label(EST_TokenStream &ts, EST_Relation &s, int sample)
Definition: relation_io.cc:314
int eoln()
end of line
Definition: EST_Token.cc:832
Utility EST_String Functions header file.
#define DEF_SAMPLE_RATE
Definition: relation_io.cc:53