Edinburgh Speech Tools  2.1-release
EST_FeatureData.cc
Go to the documentation of this file.
1 /************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1996,1997 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /************************************************************************/
33 /* */
34 /* Author: Paul Taylor Caley */
35 /* Date: July 1998 */
36 /* -------------------------------------------------------------------- */
37 /* Feature Data Class */
38 /* */
39 /************************************************************************/
40 
41 #include "EST_TMatrix.h"
42 #include "EST_Val.h"
43 #include "EST_FeatureData.h"
44 #include "EST_string_aux.h"
45 #include "EST_Token.h"
46 #include "EST_FileType.h"
47 #include "EST_error.h"
48 #include <iostream>
49 #include <fstream>
50 
51 #include "EST_THash.h"
52 
53 using namespace std;
54 
56 {
57  default_vals();
58 }
59 
60 
61 
63 {
64  default_vals();
65  copy(a);
66 }
67 
69 {
70 }
71 
73 {
74  return fd.num_rows();
75 }
76 
78 {
79  return fd.num_columns();
80 }
81 
82 
83 void EST_FeatureData::default_vals()
84 {
85 /* cout << "Default values\n";
86  p_sub_fd = false;
87  p_info = new EST_FeatureInfo;
88 */
89 }
90 
91 void EST_FeatureData::set_num_samples(ssize_t num_samples, bool preserve)
92 {
93  fd.resize(num_samples, fd.num_columns(), preserve);
94 }
95 
96 void EST_FeatureData::resize(ssize_t num_samples, ssize_t num_features, bool preserve)
97 {
98  // If enlargement is required, give new features dummy names
99  // and set their types to <STRING>. If preserve is set to 0
100  // rename all features this way.
101 
102  if (num_features > fd.num_columns())
103  {
104  ssize_t i;
105  if (preserve)
106  i = fd.num_columns();
107  else
108  i = 0;
109  for (; i < num_features; ++i)
110  info.set("unnamed_" + itoString(i), "<STRING>");
111  }
112 
113  fd.resize(num_samples, num_features, preserve);
114 }
115 
116 void EST_FeatureData::resize(ssize_t num_samples, EST_Features &f, bool preserve)
117 {
118  fd.resize(num_samples, f.length(), preserve);
119  info = f;
120 }
121 
123 {
124  EST_String t = info.S(feature_name);
125 
126  if (t.contains("<", 0)) // i.e. a predefined type
127  return t;
128 
129  return "undef";
130 }
131 
133 {
134  EST_StrList v;
135  EST_String t = info.S(feature_name);
136 
137  // check for infinite set:
138  if ((t == "<FLOAT>") || (t == "<INT>") || (t == "<STRING>"))
139  return v;
140 
141  StringtoStrList(t, v);
142  return v;
143 }
144 
146 {
147  int i;
148 
150 
151  for (i = 0, p.begin(info); p; ++p, ++i)
152  {
153 // cout << "looking at " << info.fname(p) << endl;
154 // cout << "i = " << i << endl;
155  if (p->k == feature_name)
156  return i;
157  }
158 
159  EST_error("No such feature %s\n", (const char *) feature_name);
160  return 0;
161 }
162 
163 int EST_FeatureData::update_values(const EST_String &feature_name, int max)
164 {
165  // This should be converted back to Hash tables once extra
166  // iteration functions are added the EST_Hash.
167  ssize_t i, col;
169  EST_String v;
170 
171 // EST_TStringHash<int> values(max);
172 
173  col = feature_position(feature_name);
174 
175  for (i = 0; i < num_samples(); ++i)
176  values.set(fd.a(i, col).string(), 1);
177 
178  // check to see if there are more types than allowed, if so
179  // just set to open set STRING
180  if (values.length() > max)
181  v = "<STRING>";
182  else
183  {
185  for(p.begin(values); p; ++p)
186  v += p->k + " ";
187  }
188 
189  info.set(feature_name, v);
190 
191  return values.length();
192 }
193 
194 EST_FeatureData & EST_FeatureData::copy(const EST_FeatureData &a)
195 {
196  (void) a;
197 /* // copy on a sub can't alter header information
198  if (!p_sub_fd)
199  {
200  delete p_info;
201  *p_info = *(a.p_info);
202  }
203  // but data can be copied so long as no resizing is involved.
204  EST_ValMatrix::operator=(a);
205 */
206  return *this;
207 }
208 
209 /*void EST_FeatureData::a(int i, int j)
210 {
211  return EST_ValMatrix::a(i, j);
212 }
213 */
214 /*
215 EST_Val &EST_FeatureData::operator()(int i, int j)
216 {
217  return a(i, j);
218 }
219 
220 EST_Val &EST_FeatureData::operator()(int s, const EST_String &f)
221 {
222  int i = info().field_index(f);
223  return a(s, i);
224 }
225 
226 EST_FeatureData &EST_FeatureData::operator=(const EST_FeatureData &f)
227 {
228  return copy(f);
229 }
230 
231 */
233 {
234  (void)f;
235  return fd.a(i, 0);
236 }
237 
239 {
240  return fd.a(i, j);
241 }
243 {
244  (void)f;
245  return fd.a(i, 0);
246 }
247 
249 {
250  return fd.a(i, j);
251 }
252 
253 
254 /*
255 void EST_FeatureData::sub_samples(EST_FeatureData &f, int start, int num)
256 {
257  sub_matrix(f, start, num);
258  f.p_info = p_info;
259  f.p_sub_fd = true;
260 }
261 
262 void EST_FeatureData::extract_named_fields(const EST_String &fields)
263 {
264  EST_FeatureData n;
265  // there must be a more efficient way than a copy?
266  extract_named_fields(n, fields);
267  *this = n;
268 }
269 
270 void EST_FeatureData::extract_named_fields(const EST_StrList &fields)
271 {
272  EST_FeatureData n;
273  // there must be a more efficient way than a copy?
274  extract_named_fields(n, fields);
275  *this = n;
276 }
277 
278 void EST_FeatureData::extract_numbered_fields(const EST_String &fields)
279 {
280  EST_FeatureData n;
281  // there must be a more efficient way than a copy?
282  extract_numbered_fields(n, fields);
283  *this = n;
284 }
285 
286 void EST_FeatureData::extract_numbered_fields(const EST_IList &fields)
287 {
288  EST_FeatureData n;
289  // there must be a more efficient way than a copy?
290  extract_numbered_fields(n, fields);
291  *this = n;
292 }
293 
294 
295 void EST_FeatureData::extract_named_fields(EST_FeatureData &f,
296  const EST_String &fields) const
297 {
298  EST_StrList s;
299 
300  StringtoStrList(fields, s);
301  extract_named_fields(f, s);
302 }
303 void EST_FeatureData::extract_named_fields(EST_FeatureData &f,
304  const EST_StrList &n_fields) const
305 {
306  EST_Litem *p;
307  EST_StrList n_types;
308  int i, j;
309 
310  info().extract_named_fields(*(f.p_info), n_fields);
311 
312  for (p = n_fields.head(), i = 0; i < f.num_fields(); ++i, p = p->next())
313  for (j = 0; j < f.num_samples(); ++j)
314  f(j, i) = a(j, n_fields(p));
315 
316 }
317 
318 void EST_FeatureData::extract_numbered_fields(EST_FeatureData &f,
319  const EST_IList &fields) const
320 {
321  EST_Litem *p;
322  EST_StrList n_fields;
323  int i, j;
324 
325  for (p = fields.head(); p; p = p->next())
326  n_fields.append(info().field_name(fields(p)));
327 
328  info().extract_named_fields(*(f.p_info), n_fields);
329 
330  for (p = fields.head(), i = 0; i < f.num_fields(); ++i, p = p->next())
331  for (j = 0; j < f.num_samples(); ++j)
332  f(j, i) = a(j, fields(p));
333 
334 }
335 
336 void EST_FeatureData::extract_numbered_fields(EST_FeatureData &f,
337  const EST_String &fields) const
338 {
339  EST_StrList s;
340  EST_IList il;
341 
342  StringtoStrList(fields, s);
343  StrListtoIList(s, il);
344  extract_numbered_fields(f, il);
345 }
346 */
347 
349 {
350  (void)f;
351  (void)filename;
352 /*
353  ostream *outf;
354  EST_Litem *s, *e;
355  int i;
356  if (filename == "-")
357  outf = &cout;
358  else
359  outf = new ofstream(filename);
360 
361  if (!(*outf))
362  return write_fail;
363 
364  outf->precision(5);
365  outf->setf(ios::fixed, ios::floatfield);
366  outf->width(8);
367 
368  *outf << "EST_File feature_data\n"; // EST header identifier
369  *outf << "DataType ascii\n";
370  *outf << "NumSamples " << f.num_samples() << endl;
371  *outf << "NumFields " << f.num_fields() << endl;
372  *outf << "FieldNames " << f.info().field_names();
373  *outf << "FieldTypes " << f.info().field_types();
374  if (f.info().group_start.length() > 0)
375  for (s = f.info().group_start.head(), e = f.info().group_end.head();
376  s; s = s->next(), e = e->next())
377  *outf << "Group " << f.info().group_start.key(s) << " " <<
378  f.info().group_start.val(s) << " " << f.info().group_end.val(e) << endl;
379 
380  for (i = 0; i < f.num_fields(); ++i)
381  if (f.info().field_values(i).length() > 0)
382  *outf << "Field_" << i << "_Values "
383  << f.info().field_values(i) << endl;
384 
385  *outf << "EST_Header_End\n"; // EST end of header identifier
386 
387 // *outf << ((EST_ValMatrix ) f);
388  *outf << f;
389  */
390 
391  return write_ok;
392 }
393 
394 
396  const EST_String &file_type) const
397 {
398  if ((file_type == "est") || (file_type == ""))
399  return save_est(*this, filename);
400 /* else if (file_type = "octave")
401  return save_octave(*this, filename);
402  else if (file_type = "ascii")
403  return save_ascii(*this, filename);
404 */
405 
406  cerr << "Can't save feature data in format \"" << file_type << endl;
407  return write_fail;
408 }
409 
410 
411 
413 {
414  int i, j;
415  EST_Option hinfo;
416  EST_String k, v;
417  EST_read_status r;
418  bool ascii;
419  EST_TokenStream ts;
420  EST_EstFileType t;
421  int ns, nf;
422 
423  if (((filename == "-") ? ts.open(cin) : ts.open(filename)) != 0)
424  {
425  cerr << "Can't open track file " << filename << endl;
426  return misc_read_error;
427  }
428  // set up the character constant values for this stream
429  ts.set_SingleCharSymbols(";");
430  ts.set_quotes('"','\\');
431 
432  if ((r = read_est_header(ts, hinfo, ascii, t)) != format_ok)
433  {
434  cerr << "Error reading est header of file " << filename << endl;
435  return r;
436  }
437 
438  if (t != est_file_feature_data)
439  {
440  cerr << "Not a EST Feature Data file: " << filename << endl;
441  return misc_read_error;
442  }
443 
444  ns = hinfo.ival("NumSamples");
445  nf = hinfo.ival("NumFeatures");
446 
447  cout << "ns: " << ns << endl;
448  cout << "nf: " << nf << endl;
449  resize(ns, nf);
450 
451  info.clear(); // because resize will make default names
452 
453  for (i = 0; i < nf; ++i)
454  {
455  k = "Feature_" + itoString(i+1);
456  if (hinfo.present(k))
457  {
458  v = hinfo.val(k);
459  info.set(v.before(" "), v.after(" "));
460  cout << "value: " << v.after(" ") << endl;
461  }
462  else
463  EST_error("No feature definition given for feature %d\n", i);
464  }
465 
466  for (i = 0; i < ns; ++i)
467  {
469  for (p.begin(info), j = 0; j < nf; ++j, ++p)
470  {
471  if (p->k == "<FLOAT>")
472  a(i, j) = atof(ts.get().string());
473  else if (p->k == "<BOOL>")
474  a(i, j) = atoi(ts.get().string());
475  else if (p->k == "<INT>")
476  a(i, j) = atoi(ts.get().string());
477  else
478  a(i, j) = ts.get().string();
479  }
480  }
481 
482  return format_ok;
483 }
484 
485 /*ostream& operator << (ostream &st, const EST_FeatureInfo &a)
486 {
487 
488 // st << a.field_names() << endl;
489 // st << a.field_types() << endl;
490 
491  return st;
492 }
493 */
494 
495 ostream& operator << (ostream &st, const EST_FeatureData &d)
496 {
497  ssize_t i, j;
498  EST_String t;
499  EST_Val v;
500 
501 // st << a;
502 
503 // EST_ValMatrix::operator<<(st, (EST_ValMatrix)a);
504 
505  for (i = 0; i < d.num_samples(); ++i)
506  {
507  for (j = 0; j < d.num_features(); ++j)
508  {
509  v = d.a(i, j);
510  st << v << " ";
511 // cout << "field type " << a.info().field_type(j) << endl;
512 /* else if (a.info().field_type(j) == "float")
513  st << a.a(i, j);
514  else if (a.info().field_type(j) == "int")
515  st << a.a(i, j);
516 
517  else if (a.info().field_type(j) == "string")
518  {
519  // st << "\"" << a.a(i, j) << "\"";
520  t = a.a(i, j);
521  t.gsub(" ", "_");
522  st << t;
523  }
524 */
525  }
526  st << endl;
527  }
528 
529  return st;
530 }
EST_TokenStream & get(EST_Token &t)
get next token in stream
Definition: EST_Token.cc:499
int contains(const char *s, ssize_t pos=-1) const
Does it contain this substring?
Definition: EST_String.h:365
EST_write_status
int ival(const EST_String &rkey, int m=1) const
Definition: EST_Option.cc:82
EST_EstFileType
Definition: EST_FileType.h:50
void set_SingleCharSymbols(const EST_String &sc)
set which characters are to be treated as single character symbols
Definition: EST_Token.h:344
EST_StrList values(const EST_String &feature_name)
EST_String values
int feature_position(const EST_String &feature_name)
EST_String itoString(int n)
Make a EST_String object from an integer.
Definition: util_io.cc:141
void set(const EST_String &name, int ival)
Definition: EST_Features.h:186
int ssize_t
void resize(ssize_t num_samples, ssize_t num_columns, bool preserve=1)
EST_write_status save_est(const EST_FeatureData &f, const EST_String &filename)
int open(const EST_String &filename)
open a EST_TokenStream for a file.
Definition: EST_Token.cc:213
EST_write_status save(const EST_String &name, const EST_String &EST_filetype="") const
float max(float a, float b)
Definition: EST_cluster.cc:143
void set_num_samples(ssize_t num_samples, bool preserve=1)
int length() const
Definition: EST_Features.h:251
void StringtoStrList(EST_String s, EST_StrList &l, EST_String sep)
Convert a EST_String to a EST_StrList by separating tokens in s delimited by the separator sep...
The file was written successfully.
EST_read_status read_est_header(EST_TokenStream &ts, EST_Option &hinfo, bool &ascii, EST_EstFileType &t)
Definition: est_file.cc:143
#define misc_read_error
EST_TVector< T > & copy(EST_TVector< T > a, const EST_TList< T > &in)
#define EST_error
Definition: EST_error.h:104
f
Definition: EST_item_aux.cc:48
void set_quotes(char q, char e)
set characters to be used as quotes and escape, and set quote mode
Definition: EST_Token.h:353
EST_read_status load(const EST_String &name)
The file was not written successfully.
const V & val(const K &rkey, bool m=0) const
return value according to key (const)
Definition: EST_TKVL.cc:145
EST_read_status
ssize_t num_samples() const
ssize_t num_features() const
void begin(const Container &over)
Set the iterator ready to run over this container.
#define format_ok
int present(const K &rkey) const
Returns true if key is present.
Definition: EST_TKVL.cc:222
EST_String after(int pos, int len=1) const
Part after pos+len.
Definition: EST_String.h:308
EST_String before(int pos, int len=0) const
Part before position.
Definition: EST_String.h:276
EST_Val & a(ssize_t sample, ssize_t field)
EST_String type(const EST_String &feature_name)
Utility EST_String Functions header file.
int update_values(const EST_String &feature_name, int max)
ostream & operator<<(ostream &st, const EST_FeatureData &d)