Edinburgh Speech Tools  2.1-release
EST_features_io.cc
Go to the documentation of this file.
1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1998 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author : Alan W Black */
34 /* Date : March 1998 */
35 /*-----------------------------------------------------------------------*/
36 /* Features i/o */
37 /* This is kept separate from EST_Features to help reduce dependencies */
38 /*=======================================================================*/
39 #include <cstdlib>
40 #include "EST_Features.h"
41 #include "ling_class/EST_Item.h"
42 #include "EST_error.h"
43 #include "EST_String.h"
44 #include "EST_Token.h"
45 
46 using namespace std;
47 
49  const EST_String &funcname)
50 {
51  EST_Item_featfunc f = get_featfunc(funcname,1);
52 
53  set_path(name, est_val(f));
54 }
55 
56 void EST_Features::save_fpair(ostream &outf,
57  const EST_String &fname,
58  const EST_Val &fvalue) const
59 {
60  /* Feature valued features themselves (so can't denot empty ones) */
61  if (fvalue.type() == val_type_feats)
62  {
63  EST_Features *f = feats(fvalue);
64  if (f->features->list.head() == 0)
65  {
66  // An empty feature set
67  outf << fname << " () ; ";
68  }
69  else
70  for (EST_Litem *q=f->features->list.head();
71  q != 0; q=q->next() )
72  save_fpair(outf,
73  fname+"."+f->features->list(q).k,
74  f->features->list(q).v);
75  return;
76  }
77  /* a non feature valued one */
78  // in case someone has () in their feature names (ought to be shot)
79  if (fname.contains("(") ||
80  fname.contains(")") ||
81  fname.contains(" ") || // bang, bang
82  fname.contains("\t") || // what smoking gun ?
83  fname.contains(";") ||
84  (fname == ""))
85  outf << quote_string(fname,"\"","\\",1) << " ";
86  else
87  outf << fname << " ";
88  if (fvalue == ";")
89  outf << "\";\"";
90  else if ((fvalue.type() == val_string) &&
91  ((fvalue.string().matches(RXint)) ||
92  (fvalue.string().matches(RXdouble)) ||
93  (fvalue.string().contains("(")) ||
94  (fvalue.string().contains(")")) ||
95  (fvalue.string().contains(";")) ))
96  // force quoting, cause it looks like a number but isn't
97  outf << quote_string(fvalue.string(),"\"","\\",1);
98  else if (fvalue.type() == val_float)
99  {
100  char b[20];
101  sprintf(b,"%g",fvalue.Float());
102  outf << b;
103  }
104  else if (fvalue.type() == val_type_featfunc)
105  {
106  outf << "F:"<<get_featname(featfunc(fvalue));
107  }
108  else
109  outf << quote_string(fvalue.string());
110  outf << " ; ";
111 }
112 
114 {
115  // Save features
116  if (features->list.head() == 0)
117  outf << "()";
118  else
119  for (EST_Litem *p=features->list.head(); p != 0; p=p->next() )
120  save_fpair(outf,
121  features->list(p).k,
122  features->list(p).v);
123 
124  return write_ok;
125 }
126 
128 {
129  // Save features as an sexpression
130  outf << "(";
131  for (EST_Litem *p=features->list.head(); p != 0; p=p->next() )
132  {
133  const EST_String &fname = features->list(p).k;
134  const EST_Val &fvalue = features->list(p).v;
135  outf << "(";
136  // in case someone has () in their feature names (ought to be shot)
137  if (fname.contains("(") ||
138  fname.contains(")") ||
139  fname.contains(" ") ||
140  fname.contains("\t") ||
141  fname.contains(";"))
142  outf << quote_string(fname,"\"","\\",1);
143  else
144  outf << fname;
145  outf << " ";
146  if (fvalue == ";")
147  outf << "\";\"";
148  else if ((fvalue.type() == val_string) &&
149  ((fvalue.string().matches(RXint)) ||
150  (fvalue.string().matches(RXdouble)) ||
151  (fvalue.string().contains("(")) ||
152  (fvalue.string().contains(")"))))
153  // force quoting, cause it looks like a number but isn't
154  // or contains a paren
155  outf << quote_string(fvalue.string(),"\"","\\",1);
156  else if (fvalue.type() == val_float)
157  {
158  char b[20];
159  sprintf(b,"%g",fvalue.Float());
160  outf << b;
161  }
162  else if (fvalue.type() == val_type_featfunc)
163  {
164  outf << "F:"<<get_featname(featfunc(fvalue));
165  }
166  else if (fvalue.type() == val_type_feats)
167  {
168  feats(fvalue)->save_sexpr(outf);
169  }
170  else
171  outf << quote_string(fvalue.string());
172  outf << ")";
173  if (p->next())
174  outf << " ";
175  }
176  outf << ")";
177 
178  return write_ok;
179 }
180 
182 {
183  /* Load in feature structure from sexpression */
184 
185  if (ts.peek() != "(")
186  {
187  cerr << "load_features: no sexpression found\n";
188  return misc_read_error;
189  }
190  else
191  {
192  EST_String f;
193  EST_Token v;
194  ts.get(); /* skip opening paren */
195  for (; ts.peek() != ")"; )
196  {
197  if (ts.peek() != "(")
198  {
199  cerr << "load_features: no sexpression found\n";
200  return misc_read_error;
201  }
202  ts.get();
203  f = ts.get().string(); /* feature name */
204  if ((ts.peek() == "(") && (ts.peek().quoted() == FALSE))
205  {
206  EST_Features fv;
207  set(f,fv);
208  A(f).load_sexpr(ts);
209  }
210  else
211  {
212  v = ts.get();
213  if (v.quoted())
214  set(f,v.string());
215  else if (v.string().matches(RXint))
216  set(f,atoi(v.string()));
217  else if (v.string().matches(RXdouble))
218  set(f,atof(v.string()));
219  else if (v.string().contains("F:"))
220  {
221  EST_Item_featfunc func =
222  get_featfunc(v.string().after("F:"));
223  if (func != NULL)
224  set_val(f,est_val(func));
225  else
226  {
227  cerr << "load_features: Unknown Function '" << f <<"'\n";
228  set_val(f,feature_default_value);
229  }
230  }
231  else
232  set(f,v.string());
233 
234  }
235  if (ts.get() != ")")
236  {
237  cerr << "load_features: no sexpression found\n";
238  return misc_read_error;
239  }
240  }
241  if (ts.get() != ")")
242  {
243  cerr << "load_features: no sexpression found\n";
244  return misc_read_error;
245  }
246  }
247  return format_ok;
248 }
249 
251 {
252  // load features from here to end of line separated by semicolons
253  EST_String f;
254  EST_Token v;
255  static EST_Val val0 = EST_Val(0);
256 
257  while (!ts.eoln())
258  {
259  if (ts.eof())
260  {
261  cerr << "load_features: unexpected end of file\n";
262  return misc_read_error;
263  }
264  f = ts.get().string();
265  v = EST_String::Empty;
266  while (((ts.peek() != ";") || (ts.peek().quoted())) &&
267  (!ts.eof()) && (!ts.eoln()))
268  if (v == "")
269  v = ts.get();
270  else
271  v = v.string()
272  + ts.peek().whitespace()
273  + ts.get().string();
274  if (v.quoted() || (v.string() == ""))
275  set_path(f,EST_Val(v.string()));
276  else if (v.string() == "0") // very common cases for speed
277  set_path(f,val0);
278  else if ((strchr("0123456789-.",v.string()(0)) != NULL) &&
279  (v.string().matches(RXdouble)))
280  {
281  if (v.string().matches(RXint))
282  set_path(f, EST_Val(atoi(v.string())));
283  else
284  set_path(f, EST_Val(atof(v.string())));
285  }
286  else if (v.string().contains("F:"))
287  {
288  EST_Item_featfunc func = get_featfunc(v.string().after("F:"));
289  if (func != NULL)
290  set_path(f, est_val(func));
291  else
292  {
293  cerr << "load_features: Unknown Function '" << f <<"'\n";
294  set_path(f, feature_default_value);
295  }
296  }
297  else if (v.string() == "()")
298  { // An empty feature set
299  EST_Features *fs = new EST_Features;
300  set_path(f,est_val(fs));
301  }
302  else if (v != "<contents>") // unsupported type
303  set_path(f,EST_Val(v.string()));
304  if (ts.peek() == ";")
305  ts.get();
306  else if (!ts.eoln())
307  {
308  cerr << "load_features: " << ts.pos_description() <<
309  " missing semicolon in feature list\n";
310  return misc_read_error;
311  }
312  }
313  return format_ok;
314 }
315 
EST_write_status save_sexpr(ostream &outf) const
save features as s-expression in already opened ostream
EST_Item_featfunc featfunc(const EST_Val &v)
Definition: item_feats.cc:100
EST_TokenStream & get(EST_Token &t)
get next token in stream
Definition: EST_Token.cc:499
void set_function(const EST_String &name, const EST_String &f)
EST_Val(* EST_Item_featfunc)(class EST_Item *)
Definition: EST_Features.h:53
int contains(const char *s, ssize_t pos=-1) const
Does it contain this substring?
Definition: EST_String.h:365
EST_read_status load_sexpr(EST_TokenStream &ts)
load features from sexpression, contained in already opened EST_TokenStream
EST_write_status
EST_Val est_val(const EST_Item_featfunc f)
Definition: item_feats.cc:122
val_type val_string
Definition: EST_Val.cc:46
EST_String get_featname(const EST_Item_featfunc func)
Definition: item_feats.cc:76
EST_read_status load(EST_TokenStream &ts)
load features from already opened EST_TokenStream
EST_Regex RXdouble("-?\\(\\([0-9]+\\.[0-9]*\\)\\|\\([0-9]+\\)\\|\\(\\.[0-9]+\\)\\)\\([eE][---+]?[0-9]+\\)?")
Floating point number.
const EST_String & whitespace()
Definition: EST_Token.h:112
int quoted() const
TRUE is token was quoted.
Definition: EST_Token.h:172
EST_UItem * next()
Definition: EST_UList.h:55
val_type val_float
Definition: EST_Val.cc:45
EST_Regex RXint("-?[0-9]+")
Integer.
EST_TKVL< EST_String, EST_Val > * features
Definition: EST_Features.h:65
int eof()
end of file
Definition: EST_Token.h:362
val_type val_type_featfunc
Definition: item_feats.cc:99
The file was written successfully.
EST_TList< EST_TKVI< K, V > > list
Linked list of key-val pairs. Don&#39;t use this as it will be made private in the future.
Definition: EST_TKVL.h:94
val_type type(void) const
Definition: EST_Val.h:137
#define FALSE
Definition: EST_bool.h:119
#define misc_read_error
NULL
Definition: EST_WFST.cc:55
f
Definition: EST_item_aux.cc:48
EST_Token & peek(void)
peek at next token
Definition: EST_Token.h:332
const EST_String & string(void) const
Definition: EST_Val.h:161
EST_write_status save(ostream &outf) const
save features in already opened ostream
int matches(const char *e, ssize_t pos=0) const
Exactly match this string?
Definition: EST_String.cc:651
EST_read_status
const EST_String & string() const
Definition: EST_Token.h:120
#define format_ok
const EST_String pos_description()
A string describing current position, suitable for error messages.
Definition: EST_Token.cc:882
EST_Item_featfunc get_featfunc(const EST_String &name, int must=0)
Definition: item_feats.cc:58
EST_UItem * head() const
Definition: EST_UList.h:97
EST_String after(int pos, int len=1) const
Part after pos+len.
Definition: EST_String.h:308
EST_String quote_string(const EST_String &s, const EST_String &quote, const EST_String &escape, int force)
Definition: EST_Token.cc:844
static const EST_String Empty
Constant empty string.
Definition: EST_String.h:110
void save_fpair(ostream &outf, const EST_String &fname, const EST_Val &fvalue) const
int eoln()
end of line
Definition: EST_Token.cc:832
float Float(void) const
Definition: EST_Val.h:149