Edinburgh Speech Tools  2.1-release
track_example.cc
Go to the documentation of this file.
1  /************************************************************************/
2  /* */
3  /* Centre for Speech Technology Research */
4  /* University of Edinburgh, UK */
5  /* Copyright (c) 1996,1997 */
6  /* All Rights Reserved. */
7  /* */
8  /* Permission is hereby granted, free of charge, to use and distribute */
9  /* this software and its documentation without restriction, including */
10  /* without limitation the rights to use, copy, modify, merge, publish, */
11  /* distribute, sublicense, and/or sell copies of this work, and to */
12  /* permit persons to whom this work is furnished to do so, subject to */
13  /* the following conditions: */
14  /* 1. The code must retain the above copyright notice, this list of */
15  /* conditions and the following disclaimer. */
16  /* 2. Any modifications must be clearly marked as such. */
17  /* 3. Original authors' names are not deleted. */
18  /* 4. The authors' names are not used to endorse or promote products */
19  /* derived from this software without specific prior written */
20  /* permission. */
21  /* */
22  /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23  /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24  /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25  /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26  /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27  /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28  /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29  /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30  /* THIS SOFTWARE. */
31  /* */
32  /*************************************************************************/
33  /* */
34  /* Author: Richard Caley (rjc@cstr.ed.ac.uk) */
35  /* Date: Fri May 9 1997 */
36  /* ------------------------------------------------------------------- */
37  /* Example of declaration and use of tracks. */
38  /* */
39  /*************************************************************************/
40 
41 
42 #include <iostream>
43 #include <cstdlib>
44 #include "EST_Track.h"
45 #include "EST_Wave.h"
46 #include "EST_sigpr.h"
47 #include "EST_error.h"
48 
49 using namespace std;
50 
51 int main(void)
52 
53 {
54  ssize_t i, j;
55 
56  /* This program is designed as an example not as something to run
57  so for testing purpose it simply exists */
58  exit(0);
59 
60  //@ code
61  EST_Track tr; // default track declaration
62  EST_Track tra(500, 10); // allocate track with 500 frames and 10 channels
63  //@ endcode
64 
65 
66  //@ code
67  tr.resize(10, 500); // resize track to have 10 frames and 500 channels
68  tr.resize(500, 10); // resize track to have 500 frames and 10 channels
69  //@ endcode
70 
71  /* by default, resizing preserves values in the track. This
72  may involve copying some information, so if the existing values
73  are not needed, a flag can be set which usually results in
74  quicker resizing
75  */
76  //@ code
77  tr.resize(250, 5, 0); // throw away any existing values
78  //@ endcode
79 
80  //@ code
81  tr.set_num_channels(10); // makes 10 channels, keeps same no of frames
82 
83  tr.set_num_frames(400); // makes 400 frames, keeps same no of channels
84  //@ endcode
85 
86 
87  //@ code
88  tr.resize(500, 10);
89 
90  for (i = 0; i < tr.num_frames(); ++i)
91  for (j = 0; j < tr.num_channels(); ++j)
92  tr.a(i, j) = -5.0;
93 
94  //@ endcode
95 
96  /** A well formed track will have a time value, specified in seconds,
97  for every frame. The time array can be filled directly:
98  */
99  //@ code
100  for (i = 0; i < tr.num_frames(); ++i)
101  tr.t(i) = (float) i * 0.01;
102  //@ endcode
103 
104 
105  //@ code
106  tr.fill_time(0.1);
107 
108  //@ endcode
109 
110  //@ code
111  for (i = 50; i < 100; ++i)
112  tr.set_break(i);
113  //@ endcode
114 
115  //@ code
116  for (i = 50; i < 100; ++i)
117  tr.set_value(i);
118  //@ endcode
119 
120  //@ code
121  if (tr.val(60))
122  cout << "Frame 60 is not a break\n";
123 
124  if (tr.track_break(60))
125  cout << "Frame 60 is a break\n";
126  //@ endcode
127 
128 
129  //@ code
130  tr.set_channel_name("F0", 0);
131  tr.set_channel_name("energy", 1);
132  //@ endcode
133 
134  //@ code
135  EST_StrList map;
136  map.append("F0");
137  map.append("energy");
138 
139  tr.resize(500, map); // this makes a 2 channel track and sets the names to F0 and energy
140  //@ endcode
141 
142  //@ code
143 
144  map.clear();
145  map.append("F0");
146  map.append("energy");
147 
148  map.append("cep_0");
149  map.append("cep_1");
150  map.append("cep_2");
151  map.append("cep_3");
152  map.append("cep_4");
153  map.append("cep_5");
154  map.append("cep_6");
155  map.append("cep_7");
156  map.append("cep_N");
157 
158  tr.resize(500, map); // makes a 11 channel track and sets the names
159  //@ endcode
160 
161 
162  //@ code
163  map.clear();
164  map.append("F0");
165  map.append("energy");
166 
167  map.append("$cep-0+8");
168 
169  tr.resize(500, map); // does exactly as above
170  //@ endcode
171 
172  //@}
173 
174 
175  /*Access single frames or single channels. */
176 
177  //@ code
178  EST_FVector tmp_frame;
179 
180  tr.frame(tmp_frame, 50);
181  //@ endcode
182 
183  //@ code
184  EST_FVector tmp_channel;
185 
186  tr.channel(tmp_channel, 5);
187  //@ endcode
188 
189  //@ code
190  tr.channel(tmp_channel, "energy");
191  //@ endcode
192 
193  //@ code
194  tr.frame(tmp_frame, 50, 2, 9);
195  //@ endcode
196 
197  //@ code
198  tr.channel(tmp_channel, 5, 400, 100);
199  //@ endcode
200 
201  //@ code
202  EST_Track sub;
203 
204  tr.sub_track(sub, 0, EST_ALL, 2, 9);
205 
206  //@ endcode
207 
208  //@ code
209  EST_Wave sig;
210 
211  melcep(sig, sub, 1.0, 20, 22);
212  //@ endcode
213 
214  //@ code
215 
216  tr.sub_track(sub, 0, EST_ALL, "cep_0", "cep_N");
217  //@ endcode
218 
219  //@ code
220  tr.sub_track(sub, 0, EST_ALL, "cep_0", "cep_N");
221  //@ endcode
222 
223  //@ code
224  tr.sub_track(sub, 47, 39, "cep_0", "cep_N");
225  //@ endcode
226 
227 
228  //@ code
229  EST_Track::Entries frames;
230 
231  // print out the time of every 50th track
232  cout << "Times:";
233 
234  for (frames.begin(tr); frames; ++frames)
235  {
236  const EST_Track &frame = *frames;
237  if (frames.n() % 50 ==0)
238  cout << " " << frames.n() << "[" << frame.t() << "]";
239  }
240  cout << "\n";
241 
242  //@ endcode
243 
244  //@ code
245  EST_Track tr_copy;
246 
247 // tr.copy_sub_track(tr_copy, 47, 39, "cep_0", "cep_N");
248  //@ endcode
249 
250 
251  //@ code
252  float *channel_buf, *frame_buf;
253  channel_buf = new float[tr.num_frames()];
254  frame_buf = new float[tr.num_channels()];
255 
256  tr.copy_channel_out(5, channel_buf); // copy channel 5 into channel_buf
257  tr.copy_frame_out(43, frame_buf); // copy frame 4 into frame_buf
258  //@ endcode
259 
260 
261  //@ code
262  tr.copy_channel_in(5, channel_buf); // copy channel_buf into channel 5
263  tr.copy_frame_in(43, frame_buf); // copy frame_buf into frame 4
264  //@ endcode
265 
266  //@ code
267  EST_StrList aux_names;
268 
269  aux_names.append("voicing");
270  aux_names.append("join_points");
271  aux_names.append("cost");
272 
273  tr.resize_aux(aux_names);
274  //@ endcode
275 
276  //@ code
277 
278  for (i = 0; i < 500; ++i)
279  {
280  tr.aux(i, "voicing") = i;
281  tr.aux(i, "join_points") = EST_String("stuff");
282  tr.aux(i, "cost") = 0.111;
283  }
284  //@ endcode
285 
286  /* File I/O */
287 
288  //@ code
289  if (tr.save("tmp/track.htk", "htk") != write_ok)
290  EST_error("can't save htk file\n");
291  //@ endcode
292 
293  //@ code
294  if (tr.save("tmp/track.est", "est") != write_ok)
295  EST_error("can't save est file\n");
296  //@ endcode
297 
298  //@ code
299  if (tr.save("tmp/track.ascii", "ascii") != write_ok)
300  EST_error("can't save ascii file\n");
301  //@ endcode
302 
303  //@ code
304  EST_Track tr2;
305  if (tr2.load("tmp/track.htk") != read_ok)
306  EST_error("can't reload htk\n");
307  //@ endcode
308 
309  //@ code
310  if (tr.load("tmp/track.ascii", 0.01) != read_ok)
311  EST_error("can't reload ascii file\n");
312  //@ endcode
313 
314  exit(0);
315 }
316 
317 
318 
319 /** @page EST_Track-example EST_Track class example code
320  @tableofcontents
321  @brief Some examples of track manipulations.
322  @dontinclude track_example.cc
323 
324  @section initializing Initialising and Resizing a Track
325 
326  The constructor functions can be used to create a track with
327  zero frames and channels or a track with a specified number of
328  frames and channels
329 
330  @skipline //@ code
331  @until //@ endcode
332 
333 
334  tracks can be resized at any time:
335 
336  @skipline //@ code
337  @until //@ endcode
338 
339  by default, resizing preserves values in the track. This
340  may involve copying some information, so if the existing values
341  are not needed, a flag can be set which usually results in
342  quicker resizing
343 
344  @skipline //@ code
345  @until //@ endcode
346 
347  If only the number of channels or the number of frames needs
348  to be changed, this an be done with the following functions:
349  @skipline //@ code
350  @until //@ endcode
351 
352  The preserve flag works in the same way with these functions
353 
354  @section simple_access Simple Access
355 
356  Values in the track can be accessed and set by frame
357  number and channel number.
358 
359  The following resizes a track to have 500 frames and 10 channels
360  and fills every position with -5.
361 
362  @skipline //@ code
363  @until //@ endcode
364 
365  A well formed track will have a time value, specified in seconds,
366  for every frame. The time array can be filled directly:
367 
368  @skipline //@ code
369  @until //@ endcode
370 
371  which fills the time array with values 0.01, 0.02,
372  0.03... 5.0. However, A shortcut function is provided for fixed
373  frame spacing:
374  @skipline //@ code
375  @until //@ endcode
376 
377  which performs the same operation as above. Frames do not have
378  to be evenly spaced, in pitch synchronous processing the time
379  array holds the time position of each pitch period. In such
380  cases each position in the time array must obviously be set
381  individually.
382 
383  Some representations have undefined values during certain
384  sections of the track, for example the F0 value during
385  unvoiced speech.
386 
387  The break/value array can be used to specify if a frame has an
388  undefined value.
389 
390  If a frame in this array is 1, that means the amplitude is defined
391  at that point. If 0, the amplitude is undefined.
392  By default, every frame has a value.
393 
394  Breaks (undefined values) can be set by EST_Track::set_break().
395  The following sets every frame from 50 to 99 as a break:
396 
397  @skipline //@ code
398  @until //@ endcode
399 
400  frames can be turned back to values as follows:
401 
402  @skipline //@ code
403  @until //@ endcode
404 
405  It is up to individual functions to decide how to interpret breaks.
406 
407  A frame's status can be checked as follows:
408 
409  @skipline //@ code
410  @until //@ endcode
411 
412  @section est_trac_naming_channels Naming Channels
413 
414  While channels can be accessed by their index, it is often useful
415  to give them names and refer to them by those names.
416 
417  The EST_Track::set_channel_name() function sets the name of a
418  single channel:
419 
420  @skipline //@ code
421  @until //@ endcode
422 
423  An alternative is to use a predefined set of channel names
424  stored in a *map*. A track map
425  is simply a String List strings which describe a channel name
426  configuration. The EST_Track::resize function can take
427  this and resize the number of channels to the number of channels
428  indicated in the map, and give each channel its name from the
429  map. For example:
430  @skipline //@ code
431  @until //@ endcode
432 
433  A convention is used for channels which comprise
434  components of a multi-dimensional analysis such as
435  cepstra. In such cases the channels are named
436  `TYPE_I`. The last coefficient is
437  always names `TYPE_N` regardless of
438  the number of coefficients. This is very useful in extracting
439  a set of related channels without needing to know the order
440  of the analysis.
441 
442  For example, a track map might look like:
443 
444  @skipline //@ code
445  @until //@ endcode
446 
447  This obviously gets unwieldy quite quickly, so the mapping
448  mechanism provides a short hand for multi-dimensional data.
449 
450  @skipline //@ code
451  @until //@ endcode
452 
453  Here \$ indicates the special status, "cep" the name of the
454  coefficients, "-0" that the first is number 0 and "+8" that
455  there are 8 more to follow.
456 
457 
458  @section tr_example_access_single_frames Access single frames or single channels.
459 
460  Often functions perform their operations on only a single
461  frame or channel, and the track class provides a general
462  mechanism for doing this.
463 
464  Single frames or channels can be accessed as EST_FVector :
465  Given a track with 500 frames and 10 channels, the 50th frame
466  can be accessed as:
467  @skipline //@ code
468  @until //@ endcode
469 
470  now `tmp_frame` is 10 element vector, which is
471  a window into `tr`: any changes to the contents of `tmp_frame` will
472  change `tr`. `tmp_frame` cannot be resized. (This operation can
473  be thought in standard C terms as `tmp_frame` being a pointer
474  to the 5th frame of `tr`).
475 
476 
477  Likewise with channels:
478 
479  @skipline //@ code
480  @until //@ endcode
481 
482  Again, `tmp_channel` is 500 element vector, which is
483  a window into `tr`: any changes to the contents of `tmp_channel` will
484  change `tr`. `tmp_channel` cannot be resized.
485 
486 
487  Channels can also be extracted by name:
488 
489  @skipline //@ code
490  @until //@ endcode
491 
492  not all the channels need be put into the temporary frame.
493  Imagine we have a track with a F0 channel,a energy channel and
494  10 cepstrum channels. The following makes a frame from the
495  50th frame, which only includes the cepstral information in
496  channels 2 through 11
497 
498  @skipline //@ code
499  @until //@ endcode
500 
501  @skipline //@ code
502  @until //@ endcode
503 
504  @section tr_example_access_multiple_frames Access multiple frames or channels.
505 
506  In addition to extracting single frames and channels, multiple
507  frame and channel portions can be extracted in a similar
508  way. In the following example, we make a sub-track sub, which
509  points to the entire cepstrum portion of a track (channels 2
510  through 11)
511 
512  @skipline //@ code
513  @until //@ endcode
514 
515  Parameter `sub` behaves exactly like a normal
516  track in every way, except that it cannot be resized. Its
517  contents behave like a point into the designated portion of
518  `tr`, so changing `sub` will change `tr`.
519 
520  The first argument is the
521  `sub` track. The second states the start
522  frame and the total number of frames required. EST_ALL is a
523  special constant that specifies that all the frames are
524  required here. The next argument is the start channel number
525  (remember channels are numbered from 0), and the last argument
526  is the total number of channels required.
527 
528  This facility is particularly useful for using standard
529  signal processing functions efficiently. For example,
530  the \ref melcep in the signal processing library
531  takes a waveform and produces a mel-scale cepstrum. It determines
532  the order of the cepstral analysis by the number of channels in
533  the track it is given, which has already been allocated to have
534  the correct number of frames and channels.
535 
536  The following will process the waveform
537  `sig`, produce a 10th order mel cepstrum
538  and place the output in `sub`. (For
539  explanation of the other options see
540  \ref melcep
541 
542  @skipline //@ code
543  @until //@ endcode
544 
545  because we have made `sub` a window
546  into `tr`, the melcep function writes its
547  output into the correct location, i.e. channels 2-11 of tr. If
548  it were no for the sub_track facility, either a separate track
549  of the right size would be passed into melcep and then it
550  would be copied into tr (wasteful), or else tr would be passed
551  in and other arguments would have to specify which channels
552  should be written to (messy).
553 
554  Sub-tracks can also be set using channel names. The
555  following example does exactly as above, but is referenced by
556  the name of the first channel required and the number of
557  channels to follow:
558 
559  @skipline //@ code
560  @until //@ endcode
561 
562  and this specifies the end by a string also:
563 
564  @skipline //@ code
565  @until //@ endcode
566 
567  sub_tracks can be any set of continuous frames and
568  channels. For example if a word started at frame 43 and ended
569  and frame 86, the following would set a sub track to that
570  portion:
571  @skipline //@ code
572  @until //@ endcode
573 
574  We can step through the frames of a Track using a standard
575  iterator. The frames are returned as one-frame sub-tracks.
576 
577  @skipline //@ code
578  @until //@ endcode
579 
580  The EST_Track::channel, EST_Track::frame
581  and EST_Track::sub_track functions are most commonly
582  used to write into a track using a convenient
583  sub-portion. Sometimes, however a simple copy is required
584  whose contents can be written without affecting the original.
585 
586  The EST_Track::copy_sub_track function does this
587  @skipline //@ code
588  @until //@ endcode
589 
590  Individual frames and channels can be copied out into
591  pre-allocated float * arrays as follows:
592 
593  @skipline //@ code
594  @until //@ endcode
595 
596  Individual frames and channels can be copied into the track
597  from float * arrays as follows:
598 
599  @skipline //@ code
600  @until //@ endcode
601 
602 
603 
604  @section auxiliary Auxiliary Channels
605  Auxiliary channels are used for storing frame information other than
606  amplitude coefficients, for example voicing decisions and points of
607  interest in the track.
608 
609  Auxiliary channels always have the same number of frames as the
610  amplitude channels. They are resized by assigning names to the
611  channels that need to be created:
612 
613  @skipline //@ code
614  @until //@ endcode
615 
616  The following fills in these three channels with some values:
617  @skipline //@ code
618  @until //@ endcode
619 
620 
621  @section file_i_o File I/O
622  Tracks in various formats can be saved and loaded:
623 
624  Save as a HTK file:
625 
626  @skipline //@ code
627  @until //@ endcode
628 
629  Save as a EST file:
630 
631  @skipline //@ code
632  @until //@ endcode
633 
634  Save as an ascii file:
635  @skipline //@ code
636  @until //@ endcode
637 
638  The file type is automatically determined from the file's
639  header during loading:
640 
641  @skipline //@ code
642  @until //@ endcode
643 
644  If no header is found, the function assumes the
645  file is ascii data, with a fixed frame shift, arranged with rows
646  representing frames and columns channels. In this case, the
647  frame shift must be specified as an argument to this function:
648  @skipline //@ code
649  @until //@ endcode
650 
651 */
652 
A class for storing digital waveforms. The waveform is stored as an array of 16 bit shorts...
Definition: EST_Wave.h:64
The file was read in successfully.
A vector class for floating point numbers. EST_FVector x should be used instead of float *x wherever ...
Definition: EST_FMatrix.h:119
void set_value(ssize_t i)
set frame i to be a value
Definition: EST_Track.cc:133
void set_channel_name(const EST_String &name, int channel)
set the name of the channel.
Definition: EST_Track.cc:168
void set_break(ssize_t i)
set frame i to be a break
Definition: EST_Track.cc:124
int num_channels() const
return number of channels in track
Definition: EST_Track.h:657
unsigned int n() const
Return the current position.
int ssize_t
void set_num_channels(int n, bool preserve=1)
Definition: EST_Track.h:170
const int EST_ALL
void resize(ssize_t num_frames, int num_channels, bool preserve=1)
Definition: EST_Track.cc:214
void melcep(EST_Wave &sig, EST_Track &mfcc_track, float factor, int fbank_order, float liftering_parameter, EST_WindowFunc *wf=EST_Window::creator(DEFAULT_WINDOW_NAME), const bool include_c0=false, const bool up=false)
Definition: sigpr_utt.cc:540
void sub_track(EST_Track &st, int start_frame=0, int nframes=EST_ALL, int start_chan=0, int nchans=EST_ALL)
Definition: EST_Track.cc:1100
int track_break(ssize_t i) const
return true if frame i is a break
Definition: EST_Track.h:634
EST_read_status load(const EST_String name, float ishift=0.0, float startt=0.0)
Definition: EST_Track.cc:1312
void set_num_frames(ssize_t n, bool preserve=1)
Definition: EST_Track.h:178
float & t(ssize_t i=0)
return time position of frame i
Definition: EST_Track.h:478
float & a(ssize_t i, int c=0)
Definition: EST_Track.cc:1025
EST_FMatrix sub(const EST_FMatrix &a, ssize_t row, ssize_t col)
Definition: vec_mat_aux.cc:187
The file was written successfully.
EST_write_status save(const EST_String name, const EST_String EST_filetype="")
Definition: EST_Track.cc:1233
void copy_channel_out(int n, float *buf, int offset=0, int num=EST_ALL) const
Definition: EST_Track.h:311
int main(void)
void resize_aux(EST_StrList &map, bool preserve=1)
Definition: EST_Track.cc:314
void channel(EST_FVector &cv, ssize_t n, int startf=0, int nf=EST_ALL)
Definition: EST_Track.h:215
#define EST_error
Definition: EST_error.h:104
ssize_t num_frames() const
return number of frames in track
Definition: EST_Track.h:651
void copy_frame_out(int n, float *buf, int offset=0, int num=EST_ALL) const
Definition: EST_Track.h:321
void append(const T &item)
add item onto end of list
Definition: EST_TList.h:196
void copy_frame_in(int n, const float *buf, int offset=0, int num=EST_ALL)
Definition: EST_Track.h:346
void copy_channel_in(int n, const float *buf, int offset=0, int num=EST_ALL)
Definition: EST_Track.h:329
void begin(const Container &over)
Set the iterator ready to run over this container.
EST_Val & aux(ssize_t i, int c)
Definition: EST_Track.cc:428
int val(ssize_t i) const
return true if frame i is a value
Definition: EST_Track.cc:542
EST_String
void frame(EST_FVector &fv, int n, int startf=0, int nf=EST_ALL)
Definition: EST_Track.h:210
void fill_time(float t, int start=1)
Definition: EST_Track.cc:789
void clear(void)
remove all items in list
Definition: EST_TList.h:244