Edinburgh Speech Tools  2.1-release
sig2fv_main.cc
Go to the documentation of this file.
1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1995,1996 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Authors: Paul Taylor and Simon King */
34 /* Date : April 1995 */
35 /*-----------------------------------------------------------------------*/
36 /* Generate feature vectors */
37 /* */
38 /*=======================================================================*/
39 
40 #include <cstdlib>
41 #include "EST_speech_class.h"
42 #include "EST_string_aux.h"
43 #include "EST_cmd_line.h"
44 #include "EST_cmd_line_options.h"
45 #include "sigpr/EST_sigpr_utt.h"
46 #include "sigpr/EST_filter.h"
47 
48 using namespace std;
49 
50 #define EPSILON (0.0001)
51 
52 #define DEFAULT_FRAME_SIZE 0.01
53 #define DEFAULT_FRAME_FACTOR 2.0
54 #define DEFAULT_LPC_ORDER 16
55 #define DEFAULT_REF_ORDER 16
56 #define DEFAULT_CEP_ORDER 12
57 #define DEFAULT_FBANK_ORDER 20
58 #define DEFAULT_MELCEP_ORDER 12
59 #define DEFAULT_WINDOW "hamming"
60 #define DEFAULT_PREEMPH 0
61 #define DEFAULT_LIFTER 0
62 
63 
64 // sane values for pitchmarks (in seconds)
65 
66 #define MINIMUM_PITCH_PERIOD (0.0033) // 300 hz
67 #define MAXIMUM_PITCH_PERIOD (0.02) // 50 Hz
68 #define DEFAULT_PITCH_PERIOD (0.01) // 100 Hz
69 
70 void calculate_orders(EST_StrList &clist, EST_IList &olist,
71  EST_Option &op);
72 
74  EST_Features &op, int order);
75 
76 void set_options(EST_Features &op, EST_Option &al);
77 
79 {
80  return
81  EST_String("")+
82  " lpc linear predictive coding\n"
83  " cep cepstrum coding from lpc coefficients\n"
84  " melcep Mel scale cepstrum coding via fbank\n"
85  " fbank Mel scale log filterbank analysis\n"
86  " lsf line spectral frequencies\n"
87  " ref Linear prediction reflection coefficients\n"
88  " power\n"
89  " f0\n"
90  " energy: root mean square energy\n";
91 }
92 
93 
94 
95 int main(int argc, char *argv[])
96 {
97  EST_String out_file("-");
98  EST_StrList files;
99  EST_Option al;
100  EST_Features op;
101  EST_Wave sig;
102  EST_Track full;
103  EST_StrList coef_list, delta_list, acc_list, tlist, map;
104  EST_IList olist;
105 
107  (argc, argv,
108  EST_String("[input file] -o [output file]\n")+
109  "Summary: generate acoustic feature vectors for a waveform file \n"
110  "use \"-\" to make input and output files stdin/out \n"
111  "-h Options help \n\n" +
112  options_wave_input() +
113  options_track_output() + " \n"
114  "-shift <float> frame spacing in seconds for fixed frame analysis. This \n"
115  " doesn't have to be the same as the output file spacing - the \n"
116  " S option can be used to resample the track before saving \n"
117  " default: "+ftoString(DEFAULT_FRAME_SIZE) +"\n\n"
118  "-factor <float> Frames lengths will be FACTOR times the \n"
119  " local pitch period. \n"
120  " default: "+ftoString(DEFAULT_FRAME_FACTOR) +"\n\n"
121  "-pm <ifile> Pitch mark file name. This is used to \n"
122  " specify the positions of the analysis frames for pitch \n"
123  " synchronous analysis. Pitchmark files are just standard \n"
124  " track files, but the channel information is ignored and \n"
125  " only the time positions are used\n"
126  "-size <float> If specified with pm, size is used as the \n"
127  " fixed window size (times factor) rather than size within \n"
128  " each the pms.\n\n"
129 
130  "-coefs <string> list of basic types of processing required. \n"
131  " Permissable types are: \n" + sigpr_options_supported()+" \n"
132  "-delta <string> list of delta types of processing required. Basic \n"
133  " processing does not need to be specified for this option to work. \n"
134  " Permissable types are: \n" + sigpr_options_supported()+" \n"
135  "-acc <string> list of acceleration (delta delta) processing \n"
136  " required. Basic processing does not need to be specified for \n"
137  " this option to work. \n"
138  " Permissable types are: \n"
139  + sigpr_options_supported()+"\n"
140  "-window_type <string> Type of window used on waveform. \n"
141  " Permissable types are: \n" +
143  " default: " DEFAULT_WINDOW "\n\n"
144  "-lpc_order <int> Order of lpc analysis. \n\n"
145  "-ref_order <int> Order of lpc reflection coefficient analysis. \n\n"
146  "-cep_order <int> Order of lpc cepstral analysis.\n\n"
147  "-melcep_order <int> Order of Mel cepstral analysis.\n\n"
148  "-fbank_order <int> Order of filter bank analysis.\n\n"
149  "-preemph <float> Perform pre-emphasis with this factor.\n\n"
150  "-lifter <float> lifter coefficient.\n\n"
151  "-usepower use power rather than energy in filter bank \n"
152  " analysis\n\n"+
153  "-include_c0 include cepstral coefficient 0\n\n"
154  "-order <string> order of analyses\n", files, al);
155 
156  out_file = al.present("-o") ? al.val("-o") : (EST_String)"-";
157  set_options(op, al);
158 
159  StringtoStrList(al.val("-coefs"), coef_list);
160  StringtoStrList(al.val("-delta"), delta_list);
161  StringtoStrList(al.val("-acc"), acc_list);
162 
163  StringtoStrList(al.val("-order"), tlist);
164  StrListtoIList(tlist, olist);
165 
166  if (read_wave(sig, files.first(), al) != read_ok)
167  exit(-1);
168 
169  // allocate and fill time axis
170  if (al.present("-pm"))
171  {
172  if (read_track(full, al.val("-pm"), al))
173  exit(1);
174  }
175  else
176  {
177  full.resize((int)ceil(sig.end() / op.F("frame_shift")), 0);
178  full.fill_time(op.F("frame_shift"));
179  }
180 
181  // allocate channels
182  add_channels_to_map(map, coef_list, op, 0);
183  add_channels_to_map(map, delta_list, op, 1);
184  add_channels_to_map(map, acc_list, op, 2);
185 
186  //cerr << "MAP " << map << endl;
187 
188  full.resize(EST_CURRENT, map);
189 
190  if (al.present("-preemph"))
191  pre_emphasis(sig, al.fval("-preemph"));
192 
193  if(al.present("-usepower"))
194  cerr << "sig2fv: -usepower currently not supported" << endl;
195 
196  sigpr_base(sig, full, op, coef_list);
197  sigpr_delta(sig, full, op, delta_list);
198  sigpr_acc(sig, full, op, acc_list);
199 
200  if (al.present("-S"))
201  {
202  cout << "-S " << al.fval("-S") << endl;
203  full.sample(al.fval("-S"));
204  }
205 
206  if (full.save(out_file, al.val("-otype", 0)) != write_ok)
207  {
208  cerr << "sig2fv: failed to write output to \"" << out_file
209  << "\"" << endl;
210  exit(-1);
211  }
212  return 0;
213 }
214 
215 
216 
218  EST_Option &op)
219 {
220  EST_Litem *c, *o;
221  EST_String k;
222  int v;
223 
224  for (c = clist.head(), o = olist.head(); c && o; c= c->next(), o = o->next())
225  {
226  k = clist(c) + "_order";
227  v = olist(o);
228  op.override_ival(k, v);
229  }
230 }
231 
233 {
234  op.set("frame_shift", DEFAULT_FRAME_SIZE);
235  op.set("frame_factor", DEFAULT_FRAME_FACTOR);
236  op.set("window_type", DEFAULT_WINDOW);
237 
238  op.set("preemph", DEFAULT_PREEMPH);
239  op.set("lifter", DEFAULT_LIFTER);
240 
241  op.set("lpc_order", DEFAULT_LPC_ORDER);
242  op.set("ref_order", DEFAULT_REF_ORDER);
243  op.set("cep_order", DEFAULT_CEP_ORDER);
244  op.set("fbank_order", DEFAULT_FBANK_ORDER);
245  op.set("melcep_order", DEFAULT_MELCEP_ORDER);
246 
247  op.set("max_period", MAXIMUM_PITCH_PERIOD);
248  op.set("min_period", MINIMUM_PITCH_PERIOD);
249  op.set("def_period", DEFAULT_PITCH_PERIOD);
250 
251  if (al.present("-max_period"))
252  op.set("max_period", al.fval("-max_period", 0));
253  if (al.present("-min_period"))
254  op.set("min_period", al.fval("-min_period", 0));
255  if (al.present("-def_period"))
256  op.set("def_period", al.fval("-def_period", 0));
257 
258  if (al.present("-window_type"))
259  op.set("window_type", al.sval("-window_type", 1));
260 
261  if (al.present("-shift"))
262  op.set("frame_shift", al.fval("-shift", 1));
263  if (al.present("-factor"))
264  op.set("frame_factor", al.fval("-factor", 1));
265  if (al.present("-size"))
266  op.set("frame_factor", op.F("frame_factor")*-1.0*al.fval("-size"));
267  if (al.present("-length"))
268  op.set("frame_factor",
269  al.fval("-length", est_errors_allowed)/op.F("frame_shift",est_errors_allowed));
270 
271  if (al.present("-preemph"))
272  op.set("preemph", al.fval("-preemph", 1));
273  if (al.present("-lifter"))
274  op.set("lifter", al.fval("-lifter", 1));
275 
276  if (al.present("-lpc_order"))
277  op.set("lpc_order", al.ival("-lpc_order", 1));
278  if (al.present("-ref_order"))
279  op.set("ref_order", al.ival("-ref_order", 1));
280  if (al.present("-cep_order"))
281  op.set("cep_order", al.ival("-cep_order", 1));
282  if (al.present("-fbank_order"))
283  op.set("fbank_order", al.ival("-fbank_order", 1));
284  if (al.present("-melcep_order"))
285  op.set("melcep_order", al.ival("-melcep_order", 1));
286 
287  if (al.present("-usepower"))
288  op.set("usepower", al.val("-usepower", 1));
289 
290  if (al.present("-include_c0"))
291  op.set("include_c0", al.val("-include_c0", 1));
292 
293 }
294 
295 
A class for storing digital waveforms. The waveform is stored as an array of 16 bit shorts...
Definition: EST_Wave.h:64
#define MAXIMUM_PITCH_PERIOD
Definition: sig2fv_main.cc:67
void pre_emphasis(EST_Wave &sig, float a=DEFAULT_PRE_EMPH_FACTOR)
Definition: filter.cc:256
void add_channels_to_map(EST_StrList &map, EST_StrList &types, EST_Features &op, int order)
Definition: sigpr_utt.cc:76
The file was read in successfully.
#define DEFAULT_CEP_ORDER
Definition: sig2fv_main.cc:56
int override_ival(const EST_String rkey, const int rval)
add to end of list or overwrite. If rval is empty, do nothing
Definition: EST_Option.cc:72
int ival(const EST_String &rkey, int m=1) const
Definition: EST_Option.cc:82
int StrListtoIList(EST_StrList &s, EST_IList &il)
Convert a list of strings to a list of integers.
#define DEFAULT_LPC_ORDER
Definition: sig2fv_main.cc:54
#define DEFAULT_LIFTER
Definition: sig2fv_main.cc:61
const int EST_CURRENT
float fval(const EST_String &rkey, int m=1) const
Definition: EST_Option.cc:104
static EST_String options_supported(void)
Return a paragraph describing the available windows.
Definition: EST_Window.cc:385
void set(const EST_String &name, int ival)
Definition: EST_Features.h:186
void resize(ssize_t num_frames, int num_channels, bool preserve=1)
Definition: EST_Track.cc:214
int main(int argc, char *argv[])
Definition: sig2fv_main.cc:95
#define DEFAULT_FBANK_ORDER
Definition: sig2fv_main.cc:57
EST_String ftoString(float n, int pres=3, int width=0, int l=0)
Make a EST_String object from an float, with variable precision.
Definition: util_io.cc:149
EST_String sigpr_options_supported(void)
Definition: sig2fv_main.cc:78
EST_UItem * next()
Definition: EST_UList.h:55
const EST_String & sval(const EST_String &rkey, int m=1) const
Definition: EST_Option.cc:93
#define DEFAULT_MELCEP_ORDER
Definition: sig2fv_main.cc:58
#define DEFAULT_FRAME_SIZE
Definition: sig2fv_main.cc:52
void StringtoStrList(EST_String s, EST_StrList &l, EST_String sep)
Convert a EST_String to a EST_StrList by separating tokens in s delimited by the separator sep...
The file was written successfully.
void sigpr_delta(EST_Wave &sig, EST_Track &fv, EST_Features &op, const EST_StrList &slist)
Definition: sigpr_utt.cc:309
EST_write_status save(const EST_String name, const EST_String EST_filetype="")
Definition: EST_Track.cc:1233
#define DEFAULT_WINDOW
Definition: sig2fv_main.cc:59
void calculate_orders(EST_StrList &clist, EST_IList &olist, EST_Option &op)
Definition: sig2fv_main.cc:217
const T & first() const
return const reference to first item in list
Definition: EST_TList.h:152
void sample(float shift)
Definition: EST_Track.cc:674
EST_String options_wave_input(void)
const V & val(const K &rkey, bool m=0) const
return value according to key (const)
Definition: EST_TKVL.cc:145
void sigpr_acc(EST_Wave &sig, EST_Track &fv, EST_Features &op, const EST_StrList &slist)
Definition: sigpr_utt.cc:302
float end()
return the time position of the last sample.
Definition: EST_Wave.h:153
int read_track(EST_Track &tr, const EST_String &in_file, EST_Option &al)
EST_String options_track_output(void)
#define MINIMUM_PITCH_PERIOD
Definition: sig2fv_main.cc:66
EST_read_status read_wave(EST_Wave &sig, const EST_String &in_file, EST_Option &al)
#define DEFAULT_PITCH_PERIOD
Definition: sig2fv_main.cc:68
int present(const K &rkey) const
Returns true if key is present.
Definition: EST_TKVL.cc:222
EST_UItem * head() const
Definition: EST_UList.h:97
#define DEFAULT_FRAME_FACTOR
Definition: sig2fv_main.cc:53
EST_String
#define DEFAULT_PREEMPH
Definition: sig2fv_main.cc:60
int parse_command_line(int argc, char *argv[], const EST_String &usage, EST_StrList &files, EST_Option &al, int make_stdio=1)
Definition: cmd_line.cc:101
void set_options(EST_Features &op, EST_Option &al)
Definition: sig2fv_main.cc:232
void fill_time(float t, int start=1)
Definition: EST_Track.cc:789
float F(const EST_String &path) const
Definition: EST_Features.h:136
void sigpr_base(EST_Wave &sig, EST_Track &fv, EST_Features &op, const EST_StrList &slist)
Definition: sigpr_utt.cc:138
#define DEFAULT_REF_ORDER
Definition: sig2fv_main.cc:55
Utility EST_String Functions header file.