Edinburgh Speech Tools  2.1-release
sigpr_example.cc
Go to the documentation of this file.
1 /************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1996,1997 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /************************************************************************/
33 /* */
34 /* Author: Paul Taylor (pault@cstr.ed.ac.uk) */
35 /* Date: Fri May 9 1997 */
36 /* ------------------------------------------------------------------- */
37 /* Examples of Generation of Acoustic Feature Vectors from Waveforms */
38 /* */
39 /************************************************************************/
40 
41 #include <cstdlib>
42 #include "EST_sigpr.h"
43 #include "EST_cmd_line.h"
44 #include "EST_inline_utils.h"
45 #include "EST_sigpr.h"
46 
47 using namespace std;
48 
49 /**@name Signal processing example code
50  *
51  * @toc
52  */
53 ///@{
54 
56 
57 void print_map(EST_TrackMap &t);
58 void print_track_map(EST_Track &t);
59 
60 int main(void)
61 
62 {
63  EST_StrList base_list; // decl
64  EST_StrList delta_list; // decl
65  EST_StrList acc_list; // decl
66  EST_Option op, al; // decl
67  init_lib_ops(al, op);
68  EST_Wave sig; // decl
69  EST_Track fv, part; // decl
70  float shift; // decl
71  int i;
72 
73 
74  cout << "position 1\n";
75 
76  /* Producing a single type of feature vector for an utterance */
77 
78  ///@ code
79 
80  int lpc_order = 16;
81  sig.load(DATA "/kdt_001.wav");
82 
83  ///@ endcode
84 
85  /* Now allocate enough space in the track to hold the analysis. */
86  ///@ code
87  int num_frames;
88  num_frames = (int)ceil(sig.end() / 0.01);
89  fv.resize(num_frames, lpc_order + 1);
90  ///@ endcode
91 
92  /* The positions of the frames, corresponding to the middle of their
93  analysis window also needs to be set. For fixed frame analysis, this
94  can be done with the fill_time() function: */
95 
96  ///@ code
97  fv.fill_time(0.01);
98  ///@ endcode
99 
100  /* The simplest way to do the actual analysis is as follows, which
101  will fill the track with the values from the LP analysis using the
102  default processing controls.
103  */
104 
105  ///@ code
106  sig2coef(sig, fv, "lpc");
107  ///@ endcode
108 
109  /* In this style of analysis, default values are used to control the
110  windowing mechanisms which split the whole signal into frames.
111 
112  Extending one time period before and one time period after the
113  current time mark:
114  */
115  ///@ code
116  sig2coef(sig, fv, "lpc", 2.0);
117  ///@ endcode
118 
119  /* Extending 1.5 time periods before and after the
120  current time mark, etc;
121  */
122  ///@ code
123  sig2coef(sig, fv, "lpc", 3.0);
124  ///@ endcode
125 
126  /* The type of windowing function may be changed also as this
127  can be passed in as an optional argument. First we
128  create a window function (This is explained more in \ref Windowing ).
129  */
130  ///@ code
131  EST_WindowFunc *wf = EST_Window::creator("hamming");
132  ///@ endcode
133  /* and then pass it in as the last argument
134  */
135  ///@ code
136  sig2coef(sig, fv, "lpc", 3.0, wf);
137  ///@ endcode
138  ///@}
139 
140  /* Pitch-Synchronous vs fixed frame analysis.
141 
142  There are many ways to fill the time array for fixed frame analysis.
143 
144  manually:
145 
146  */
147  ///@{
148 
149  ///@ code
150  num_frames = 300;
151  fv.resize(num_frames, lpc_order + 1);
152  shift = 0.01; // time interval in seconds
153 
154  for (i = 0; i < num_frames; ++i)
155  fv.t(i) = shift * (float) i;
156  ///@ endcode
157  /* or by use of the member function EST_Track::fill_time}
158  */
159 
160  ///@ code
161  fv.fill_time(0.01);
162  ///@ endcode
163 
164  /* Pitch synchronous values can simply be read from pitchmark
165  files:
166  */
167  ///@ code
168  fv.load(DATA "/kdt_001.pm");
169  make_track(fv, "lpc", lpc_order + 1);
170  ///@ endcode
171 
172  /* Regardless of how the time points where obtain, the analysis
173  function call is just the same:
174  */
175  ///@ code
176  sig2coef(sig, fv, "lpc");
177  ///@ endcode
178  ///@}
179 
180  cout << "position 3\n";
181 
182  /* Naming Channels */
183  ///@{
184  ///@ code
185 
186  int cep_order = 16;
187  EST_StrList map;
188 
189  map.append("$lpc-0+" Stringtoi(lpc_order));
190  map.append("$cepc-0+" Stringtoi(cep_order));
191  map.append("power");
192 
193  fv.resize(EST_CURRENT, map);
194  ///@ endcode
195 
196  /* An alternative is to use add_channels_to_map()
197  which takes a list of coefficient types and makes a map.
198  The order of each type of processing is extracted from
199  op.
200  */
201 
202  ///@ code
203 
204  EST_StrList coef_types;
205 
206  coef_types.append("lpc");
207  coef_types.append("cep");
208  coef_types.append("power");
209 
210  map.clear();
211 
212  add_channels_to_map(map, coef_types, op);
213  fv.resize(EST_CURRENT, map);
214 
215  ///@ endcode
216 
217  /* After allocating the right number of frames and channels
218  in fv, we extract a sub_track, which has all the frames
219  (i.e. between 0 and EST_ALL) and all the lpc channels
220  */
221  ///@ code
222  fv.sub_track(part, 0, EST_ALL, 0, "lpc_0", "lpc_N");
223  ///@ endcode
224  /* now call the signal processing function on this part: */
225  ///@ code
226  sig2coef(sig, part, "lpc");
227  ///@ endcode
228 
229  /* We repeat the procedure for the cepstral coefficients, but this
230  time take the next 8 channels (17-24 inclusive) and calculate the coefficients:
231  */
232  ///@ code
233  fv.sub_track(part, 0, EST_ALL, "cep_0", "cep_N");
234 
235  sig2coef(sig, part, "cep");
236  ///@ endcode
237  /* Extract the last channel for power and call the power function:
238  */
239  ///@ code
240  fv.sub_track(part, 0, EST_ALL, "power", 1);
241  power(sig, part, 0.01);
242 
243  ///@ endcode
244 
245  /* While the above technique is adequate for our needs and is
246  a useful demonstration of sub_track extraction, the
247  sigpr_base function is normally easier to use as it does
248  all the sub track extraction itself. To perform the lpc, cepstrum
249  and power analysis, we put these names into a StrList and
250  call sigpr_base.
251  */
252  ///@ code
253  base_list.clear(); // empty the list, just in case
254  base_list.append("lpc");
255  base_list.append("cep");
256  base_list.append("power");
257 
258  sigpr_base(sig, fv, op, base_list);
259  ///@ endcode
260  /* This will call sigpr_track as many times as is necessary.
261  */
262  ///@}
263 
264  /* Producing delta and acceleration coefficients */
265  ///@{
266  ///@ code
267 
268  map.append("$cep_d-0+" Stringtoi(cep_order)); // add deltas
269  map.append("$cep_a-0+" Stringtoi(cep_order)); // add accs
270 
271  fv.resize(EST_CURRENT, map); // resize the track.
272  ///@ endcode
273  /* Given a EST_Track of coefficients fv, the delta
274  function is used to produce the delta equivalents del.
275  The following uses the track allocated above and
276  generates a set of cepstral coefficients and then makes their
277  delta and acc:
278 
279  */
280  ///@ code
281 
282  EST_Track del, acc;
283 
284  fv.sub_track(part, 0, EST_ALL, 0, "cep_0", "cep_N"); // make subtrack of coefs
285  sig2coef(sig, part, "cep"); // fill with cepstra
286 
287  // make subtrack of deltas
288  fv.sub_track(del, 0, EST_ALL, 0, "cep_d_0", "cep_d_N");
289  delta(part, del); // calculate deltas of part, and place answer in del
290 
291  // make subtrack of accs
292  fv.sub_track(acc, 0, EST_ALL, 0, "cep_a_0", "cep_a_N");
293  delta(del, acc); // calculate deltas of del, and place answer in acc
294  ///@ endcode
295  /* It is possible to directly calculate the delta coefficients of
296  a type of coefficient, even if we don't have the base type.
297  \ref sigpr_delta will process the waveform, make a temporary
298  track of the required type "lpc" and calculate the delta of this.
299  </para><para>
300  The following makes a set of delta reflection coefficients:
301 
302  */
303  ///@ code
304  map.append("$ref_d-0+" Stringtoi(lpc_order)); // add to map
305  fv.resize(EST_CURRENT, map); // resize the track.
306 
307  sigpr_delta(sig, fv, op, "ref");
308  ///@ endcode
309  /* an equivalent function exists for acceleration coefficients:
310  */
311  ///@ code
312  map.append("$lsf_a-0+" Stringtoi(lpc_order)); // add acc lsf
313  fv.resize(EST_CURRENT, map); // resize the track.
314 
315  sigpr_acc(sig, fv, op, "ref");
316 
317  ///@ endcode
318  ///@}
319 
320  /* Windowing
321 
322  The \ref EST_Window class provides a variety of means to
323  divide speech into frames using windowing mechanisms.
324 
325  A window function can be created from a window name using the
326  \ref EST_Window::creator function:
327  */
328  ///@{
329  ///@ code
330 
331  EST_WindowFunc *hamm = EST_Window::creator("hamming");
332  EST_WindowFunc *rect = EST_Window::creator("rectangular");
333  ///@ endcode
334  /* This function can then be used to create a EST_TBuffer of
335  window values. In the following example the values from a
336  256 point hamming window are stored in the buffer win_vals:
337  */
338  ///@ code
339  EST_FVector frame;
340  EST_FVector win_vals;
341 
342  hamm(256, win_vals);
343  ///@ endcode
344 
345  /* The make_window function also creates a window:
346  */
347  ///@ code
348  EST_Window::make_window(win_vals, 256, "hamming",-1);
349  ///@ endcode
350 
351  /* this can then be used to make a frame of speech from the main EST_Wave
352  sig. The following example extracts speech starting at sample 1000:
353  */
354  ///@ code
355  for (i = 0; i < 256; ++i)
356  frame[i] = (float)sig.a(i + 1000) * win_vals[i];
357  ///@ endcode
358 
359  /* Alternatively, exactly the same operation can be performed in a
360  single step by passing the window function to the
361  \ref EST_Window::window_signal function which takes a
362  \ref EST_Wave and performs windowing on a section of it,
363  storing the output in the \ref EST_FVector {\tt frame}.
364  */
365  ///@ code
366  EST_Window::window_signal(sig, hamm, 1000, 256, frame, 1);
367  ///@ endcode
368  /* The window function need not be explicitly created, the window
369  signal can work on just the name of the window type:
370  */
371 
372  ///@ code
373  EST_Window::window_signal(sig, "hamming", 1000, 256, frame, 1);
374  ///@ endcode
375 
376  ///@}
377  /* Frame based signal processing
378  The signal processing library provides an extensive set of functions
379  which operate on a single frame of coefficients.
380  The following example shows one method of splitting the signal
381  into frames and calling a signal processing algorithm.
382 
383  First set up the track for 16 order LP analysis:
384 
385  */
386  ///@{
387  ///@ code
388 
389  map.clear();
390  map.append("$lpc-0+16");
391 
392  fv.resize(EST_CURRENT, map);
393 
394  ///@ endcode
395  /* In this example, we take the analysis frame length to be 256 samples
396  long, and the shift in samples is just the shift in seconds times the
397  sampling frequency.
398  */
399  ///@ code
400  int s_length = 256;
401  int s_shift = int(shift * float(sig.sample_rate()));
402  EST_FVector coefs;
403  ///@ endcode
404 
405  /* Now we set up a loop which calculates the frames one at a time.
406  */
407  ///@ code
408  for (int k1 = 0; k1 < fv.num_frames(); ++k1)
409  {
410  int start = (k1 * s_shift) - (s_length/2);
411  EST_Window::window_signal(sig, "hamming", start, s_length, frame, 1);
412 
413  fv.frame(coefs, k1); // Extract a single frame
414  sig2lpc(frame, coefs); // Pass this to actual algorithm
415  }
416  ///@ endcode
417 
418  /* A slightly different tack can be taken for pitch-synchronous analysis.
419  Setting up fv with the pitchmarks and channels:
420  */
421  ///@ code
422  fv.load(DATA "/kd1_001.pm");
423  fv.resize(EST_CURRENT, map);
424  ///@ endcode
425  /* Set up as before, but this time calculate the window starts and
426  lengths from the time points. In this example, the length is a
427  {\tt factor} (twice) the local frame shift.
428  Note that the only difference between this function and the fixed
429  frame one is in the calculation of the start and end points - the
430 
431  windowing, frame extraction and call to \ref sig2lpc are exactly
432  the same.
433  */
434  ///@ code
435  float factor = 2.0;
436 
437  for (int k2 = 0; k2 < fv.num_frames(); ++k2)
438  {
439  s_length = irint(get_frame_size(fv, k2, sig.sample_rate())* factor);
440  int start = (irint(fv.t(k2) * sig.sample_rate()) - (s_length/2));
441 
442  EST_Window::window_signal(sig, wf, start, s_length, frame, 1);
443 
444  fv.frame(coefs, k2);
445  sig2lpc(frame, coefs);
446  }
447  ///@ endcode
448  ///@}
449 
450  /* Filtering */
451  ///@{
452  ///@ code
453 
454  EST_FVector filter;
455  int freq = 400;
456  int filter_order = 99;
457 
458  filter = design_lowpass_FIR_filter(sig.sample_rate(), 400, 99);
459  ///@ endcode
460  /* And now use this filter on the signal:
461  */
462  ///@ code
463  FIRfilter(sig, filter);
464  ///@ endcode
465  /* For one-off filtering operations, the filter design can be
466  done in the filter function itself. The \ref FIRlowpass_filter
467  function takes the signal, cut-off frequency and order as
468  arguments and designs the filter on the fly. Because of the
469  overhead of filter design, this function is expensive and
470  should only be used for one-off operations.
471  */
472  ///@ code
473  FIRlowpass_filter(sig, 400, 99);
474  ///@ endcode
475  /* The equivalent operations exist for high-pass filtering:
476  */
477  ///@ code
478  filter = design_highpass_FIR_filter(sig.sample_rate(), 50, 99);
479  FIRfilter(sig, filter);
480  FIRhighpass_filter(sig, 50, 99);
481  ///@ endcode
482  /* Filters of arbitrary frequency response can also be designed using
483  the \ref design_FIR_filter function.
484  */
485  ///@ code
486  EST_FVector response(16);
487  response[0] = 1;
488  response[1] = 1;
489  response[2] = 1;
490  response[3] = 1;
491  response[4] = 0;
492  response[5] = 0;
493  response[6] = 0;
494  response[7] = 0;
495  response[8] = 1;
496  response[9] = 1;
497  response[10] = 1;
498  response[11] = 1;
499  response[12] = 0;
500  response[13] = 0;
501  response[14] = 0;
502  response[15] = 0;
503 
504  filter = design_FIR_filter(response, 15);
505 
506  FIRfilter(sig, response);
507  ///@ endcode
508  /*The normal filtering functions can cause a time delay in the
509  filtered waveform. To attempt to eliminate this, a set of
510  double filter function functions are provided which guarantees
511  zero phase differences between the original and filtered waveform.
512  */
513  ///@ code
514  FIRlowpass_double_filter(sig, 400);
515  FIRhighpass_double_filter(sig, 40);
516  ///@ endcode
517 
518  /* Sometimes it is undesirable to have the input signal overwritten.
519  For these cases, a set of parallel functions exist which take
520  a input waveform for reading and a output waveform for writing to.
521  */
522  ///@ code
523  EST_Wave sig_out;
524 
525  FIRfilter(sig, sig_out, response);
526  FIRlowpass_filter(sig, sig_out, 400);
527  FIRhighpass_filter(sig, sig_out, 40);
528  ///@ endcode
529  ///@}
530 
531 }
532 
533 ///@}
534 
535 
536 /**@page sigpr-example Example of Signal Processing code
537  @brief Signal processing examples
538  @dontinclude sigpr_example.cc
539 
540 @tableofcontents
541 @section producing-feature-vector-for-utt Producing a single type of feature vector for an utterance
542 
543 A number of types of signal processing can be performed by the
544 \ref sig2coef function. The following code demonstrates a simple
545 case of calculating the linear prediction (LP) coefficients for
546 a waveform.
547 
548 First set the order of the lpc analysis to 16 (this entails 17 actual
549 coefficients) and then load in the waveform to be analysed.
550 
551  @skipline //@ code
552  @until //@ endcode
553 
554 Now allocate enough space in the track to hold the analysis.
555 The following command resizes `fv` to have enough frames for
556 analysis frames at 0.01 intervals up to the end of the waveform,
557 (sig.end()), and enough channels to store `lpc_order + 1` coefficients.
558 The channels are named so as to take lpc coefficients.
559 
560  @skipline //@ code
561  @until //@ endcode
562 
563 The positions of the frames, corresponding to the middle of their
564 analysis window also need to be set. For fixed frame analysis, this
565 can be done with the EST_Track::fill_time() function:
566 
567  @skipline //@ code
568  @until //@ endcode
569 
570 The simplest way to do the actual analysis is as follows, which
571 will fill the track with the values from the LP analysis using the
572 default processing controls.
573 
574  @skipline //@ code
575  @until //@ endcode
576 
577 In this style of analysis, default values are used to control the
578 windowing mechanisms which split the whole signal into frames.
579 
580 Specifically, each frame is defined to start a certain distance
581 before the time interval, and extending the same distance after.
582 This distance is calculated as a function of the local window
583 spacing and can be adjusted as follows:
584 
585 Extending one time period before and one time period after the
586 current time mark:
587 
588  @skipline //@ code
589  @until //@ endcode
590 
591 Extending 1.5 time periods before and after the
592 current time mark, etc;
593 
594  @skipline //@ code
595  @until //@ endcode
596 
597 The type of windowing function may be changed also as this
598 can be passed in as an optional argument. First we create a window
599 function (This is explained more in \ref Windowing).
600 
601  @skipline //@ code
602  @until //@ endcode
603  and then pass it in as the last argument
604 
605  @skipline //@ code
606  @until //@ endcode
607 
608 
609  @section pitchvsfixframe Pitch-Synchronous vs fixed frame analysis.
610 
611  Most of the core signal processing functions operate on individual
612  frames of speech and are oblivious as to how these frames were
613  extracted from the original speech. This allows us to take the frames
614  from anywhere in the signal: specifically, this facilitates two
615  common forms of analysis:
616 
617  - **fixed frame**: The time points are space at even intervals
618  throughout the signal.
619  - **pitch-synchronous**: The time points represent *pitchmarks*
620  and correspond to a specific position in each pitch period,
621  e.g. the instant of glottal closure.
622 
623  It is a simple matter to fill the time array, but normally
624  pitchmarks are read from a file or taken from another signal
625  processing algorithm (see \ref "Pitchmark functions").
626 
627  There are many ways to fill the time array for fixed frame analysis.
628 
629  manually:
630 
631  @skipline //@ code
632  @until //@ endcode
633 
634  or by use of the member function \ref EST_Track::fill_time
635 
636  @skipline //@ code
637  @until //@ endcode
638 
639  Pitch synchronous values can simply be read from pitchmark
640  files:
641 
642  @skipline //@ code
643  @until //@ endcode
644 
645  Regardless of how the time points where obtain, the analysis
646  function call is just the same:
647 
648  @skipline //@ code
649  @until //@ endcode
650 
651  @section sigpr-example-naming-channels Naming Channels
652 
653  Multiple types of feature vector can be stored in the same Track.
654  Imagine that we want lpc, cepstrum and power
655  coefficients in that order in a track. This can be achieved by using
656  the \ref sig2coef function multiple times, or by the wrap
657  around \ref sigpr_base function.
658 
659  It is vitally important here to ensure that before passing the
660  track to the signal processing functions that it has the correct
661  number of channels and that these are appropriately named. This is
662  most easily done using the track map facility, explained
663  in \ref est_trac_naming_channels.
664 
665  For each call, we only us the part of track that is relevant.
666  The EST_Track::sub_track member function is used to get
667  this. In the following example, we are assuming here that
668  `fv` has sufficient space for 17 lpc coefficients, 8 cepstrum
669  coefficients and power and that they are stored in that order.
670 
671  @skipline //@ code
672  @until //@ endcode
673 
674  An alternative is to use \ref add_channels_to_map()
675  which takes a list of coefficient types and makes a map.
676  The order of each type of processing is extracted from op.
677 
678  @skipline //@ code
679  @until //@ endcode
680 
681  After allocating the right number of frames and channels
682  in `fv`, we extract a sub_track, which has all the frames
683  (i.e. between 0 and EST_ALL) and all the lpc channels.
684 
685  @skipline //@ code
686  @until //@ endcode
687 
688  now call the signal processing function on this part:
689 
690  @skipline //@ code
691  @until //@ endcode
692 
693  We repeat the procedure for the cepstral coefficients, but this
694  time take the next 8 channels (17-24 inclusive) and calculate the coefficients:
695 
696  @skipline //@ code
697  @until //@ endcode
698 
699  Extract the last channel for power and call the power function:
700 
701  @skipline //@ code
702  @until //@ endcode
703 
704  While the above technique is adequate for our needs and is
705  a useful demonstration of sub_track extraction, the
706  \ref sigpr_base function is normally easier to use as it does
707  all the sub track extraction itself. To perform the lpc, cepstrum
708  and power analysis, we put these names into a EST_StrList and
709  call \ref sigpr_base.
710 
711  @skipline //@ code
712  @until //@ endcode
713 
714  This will call \ref sigpr_track as many times as is necessary.
715 
716  @section sigpr-deltaacc Producing delta and acceleration coefficients
717 
718  Delta coefficients represent the numerical differentiation of a
719  track, and acceleration coefficients represent the second
720  order numerical differentiation.
721 
722  By convention, delta coefficients have a "_d" suffix and acceleration
723  coefficients "_a". If the coefficient is multi-dimensional, the
724  numbers go after the "_d" or "_a".
725 
726  @skipline //@ code
727  @until //@ endcode
728 
729  Given a EST_Track of coefficients `fv`, the \ref EST_Track::delta
730  function is used to produce the delta equivalents `del`.
731  The following uses the track allocated above and
732  generates a set of cepstral coefficients and then makes their
733  delta and acc:
734 
735  @skipline //@ code
736  @until //@ endcode
737 
738  It is possible to directly calculate the delta coefficients of
739  a type of coefficient, even if we don't have the base type.
740  \ref sigpr_delta will process the waveform, make a temporary
741  track of the required type "lpc" and calculate the delta of this.
742 
743  The following makes a set of delta reflection coefficients:
744 
745  @skipline //@ code
746  @until //@ endcode
747 
748  an equivalent function exists for acceleration coefficients:
749 
750  @skipline //@ code
751  @until //@ endcode
752 
753  @section sigpr-windowing Windowing
754 
755  The \ref EST_Window class provides a variety of means to
756  divide speech into frames using windowing mechanisms.
757 
758  A window function can be created from a window name using the
759  EST_Window::creator function:
760 
761  @skipline //@ code
762  @until //@ endcode
763 
764  This function can then be used to create a EST_TBuffer of
765  window values. In the following example the values from a
766  256 point hamming window are stored in the buffer `win_vals`:
767 
768  @skipline //@ code
769  @until //@ endcode
770 
771  The make_window function also creates a window:
772 
773  @skipline //@ code
774  @until //@ endcode
775 
776  this can then be used to make a frame of speech from the main EST_Wave
777  `sig`. The following example extracts speech starting at sample 1000:
778 
779  @skipline //@ code
780  @until //@ endcode
781 
782  Alternatively, exactly the same operation can be performed in a
783  single step by passing the window function to the
784  EST_Window::window_signal function which takes a
785  EST_Wave and performs windowing on a section of it,
786  storing the output in the EST_FVector `frame`.
787 
788  @skipline //@ code
789  @until //@ endcode
790 
791  The window function need not be explicitly created, the window
792  signal can work on just the name of the window type:
793 
794  @skipline //@ code
795  @until //@ endcode
796 
797  @section sigpr-example-frames Frame based signal processing
798 
799  The signal processing library provides an extensive set of functions
800  which operate on a single frame of coefficients.
801  The following example shows one method of splitting the signal
802  into frames and calling a signal processing algorithm.
803 
804  First set up the track for 16 order LP analysis:
805 
806  @skipline //@ code
807  @until //@ endcode
808 
809  In this example, we take the analysis frame length to be 256 samples
810  long, and the shift in samples is just the shift in seconds times the
811  sampling frequency.
812 
813  @skipline //@ code
814  @until //@ endcode
815 
816  Now we set up a loop which calculates the frames one at a time.
817  `start` is the start position in samples of each frame.
818  The EST_Window::window_signal function is called which
819  makes a EST_FVector frame of the speech via a hamming window.
820 
821  Using the EST_Track::frame function, the EST_FVector
822  `coefs` is set to frame `k` in the track. It is important
823  to understand that this operation involves setting an internal
824  smart pointer in `coefs` to the memory of frame `k`. This
825  allows the signal processing function \ref sig2lpc to operate
826  on an input and output EST_FVector, without any copying to or
827  from the main track. After the \ref sig2lpc call, the kth frame
828  of `fv` is now filled with the LP coefficients.
829 
830  @skipline //@ code
831  @until //@ endcode
832 
833  A slightly different tack can be taken for pitch-synchronous analysis.
834  Setting up fv with the pitchmarks and channels:
835 
836  @skipline //@ code
837  @until //@ endcode
838 
839  Set up as before, but this time calculate the window starts and
840  lengths from the time points. In this example, the length is a
841  `factor` (twice) the local frame shift.
842  Note that the only difference between this function and the fixed
843  frame one is in the calculation of the start and end points - the
844 
845  windowing, frame extraction and call to \ref sig2lpc are exactly
846  the same.
847 
848  @skipline //@ code
849  @until //@ endcode
850 
851  @section sigpr-filtering Filtering
852 
853  In the EST library we so far have two main types of filter,
854  **finite impulse response (FIR)** filters and **linear prediction (LP)**
855  filters. **infinite impulse response (IIR)** filters are not yet
856  implemented, though LP filters are a special case of these.
857 
858  Filtering involves 2 stages: the design of the filter and the
859  use of this filter on the waveform.
860 
861  First we examine a simple low-pass filter which attempts to suppress
862  all frequencies about a cut-off. Imagine we want to low pass filter
863  a signal at 400Hz. First we design the filter:
864 
865  @skipline //@ code
866  @until //@ endcode
867 
868  And now use this filter on the signal:
869 
870  @skipline //@ code
871  @until //@ endcode
872 
873  For one-off filtering operations, the filter design can be
874  done in the filter function itself. The \ref FIRlowpass_filter
875  function takes the signal, cut-off frequency and order as
876  arguments and designs the filter on the fly. Because of the
877  overhead of filter design, this function is expensive and
878  should only be used for one-off operations.
879 
880  @skipline //@ code
881  @until //@ endcode
882 
883  The equivalent operations exist for high-pass filtering:
884 
885  @skipline //@ code
886  @until //@ endcode
887 
888  Filters of arbitrary frequency response can also be designed using
889  the \ref design_FIR_filter function. This function takes a
890  EST_FVector of order \f$2^{N}\f$ which specifies the desired frequency
891  response up to 1/2 the sampling frequency. The function returns
892  a set of filter coefficients that attempt to match the desired
893  reponse.
894 
895  @skipline //@ code
896  @until //@ endcode
897 
898  The normal filtering functions can cause a time delay in the
899  filtered waveform. To attempt to eliminate this, a set of
900  double filter function functions are provided which guarantees
901  zero phase differences between the original and filtered waveform.
902 
903  @skipline //@ code
904  @until //@ endcode
905 
906  Sometimes it is undesirable to have the input signal overwritten.
907  For these cases, a set of parallel functions exist which take
908  a input waveform for reading and a output waveform for writing to.
909 
910  @skipline //@ code
911  @until //@ endcode
912 
913 */
A class for storing digital waveforms. The waveform is stored as an array of 16 bit shorts...
Definition: EST_Wave.h:64
EST_FVector design_FIR_filter(const EST_FVector &freq_response, int filter_order)
Definition: filter.cc:418
void FIRfilter(EST_Wave &in_sig, const EST_FVector &numerator, int delay_correction=0)
Definition: filter.cc:336
static Func * creator(const char *name, bool report_error=false)
Return the creation function for the given window type.
Definition: EST_Window.cc:218
void delta(EST_Track &tr, EST_Track &d, int regression_length=3)
Definition: delta.cc:52
void add_channels_to_map(EST_StrList &map, EST_StrList &types, EST_Features &op, int order)
Definition: sigpr_utt.cc:76
void print_track_map(EST_Track &t)
static void window_signal(const EST_Wave &sig, EST_WindowFunc *make_window, int start, int size, EST_TBuffer< float > &frame)
Definition: EST_Window.cc:279
EST_FVector design_lowpass_FIR_filter(int sample_rate, int freq, int order)
Definition: filter.cc:506
A vector class for floating point numbers. EST_FVector x should be used instead of float *x wherever ...
Definition: EST_FMatrix.h:119
void FIRlowpass_filter(EST_Wave &sigin, int freq, int order=DEFAULT_FILTER_ORDER)
Definition: filter.cc:526
void power(EST_Wave &sig, EST_Track &a, float factor)
Definition: sigpr_utt.cc:422
const int EST_CURRENT
static void make_window(EST_TBuffer< float > &window_vals, int size, const char *name, int window_centre)
Definition: EST_Window.cc:259
const int EST_ALL
void resize(ssize_t num_frames, int num_channels, bool preserve=1)
Definition: EST_Track.cc:214
short & a(ssize_t i, ssize_t channel=0)
Definition: EST_Wave.cc:128
void sig2coef(EST_Wave &sig, EST_Track &a, EST_String type, float factor=2.0, EST_WindowFunc *wf=EST_Window::creator(DEFAULT_WINDOW_NAME))
Definition: sigpr_utt.cc:399
void sub_track(EST_Track &st, int start_frame=0, int nframes=EST_ALL, int start_chan=0, int nchans=EST_ALL)
Definition: EST_Track.cc:1100
EST_read_status load(const EST_String name, float ishift=0.0, float startt=0.0)
Definition: EST_Track.cc:1312
int get_frame_size(EST_Track &pms, int current_pos, int sample_rate, int prefer_prev=0)
Definition: sigpr_utt.cc:317
float & t(ssize_t i=0)
return time position of frame i
Definition: EST_Track.h:478
void FIRlowpass_double_filter(EST_Wave &sigin, int freq, int order=DEFAULT_FILTER_ORDER)
Definition: filter.cc:549
int Stringtoi(EST_String s)
Make an int from a EST_String. EST_String equivalent of atoi()
Definition: util_io.cc:131
int init_lib_ops(EST_Option &al, EST_Option &options)
Definition: cmd_line.cc:69
void sigpr_delta(EST_Wave &sig, EST_Track &fv, EST_Features &op, const EST_StrList &slist)
Definition: sigpr_utt.cc:309
int main(void)
ssize_t num_frames() const
return number of frames in track
Definition: EST_Track.h:651
void EST_WindowFunc(int size, EST_TBuffer< float > &r_window, int window_centre)
Function which creates a window.
Definition: EST_Window.h:52
void FIRhighpass_filter(EST_Wave &in_sig, int freq, int order)
Definition: filter.cc:534
getString int
Definition: EST_item_aux.cc:50
void append(const T &item)
add item onto end of list
Definition: EST_TList.h:196
void sigpr_acc(EST_Wave &sig, EST_Track &fv, EST_Features &op, const EST_StrList &slist)
Definition: sigpr_utt.cc:302
float end()
return the time position of the last sample.
Definition: EST_Wave.h:153
int sample_rate() const
return the sampling rate (frequency)
Definition: EST_Wave.h:147
float start(const EST_Item &item)
Definition: EST_item_aux.cc:52
void FIRhighpass_double_filter(EST_Wave &sigin, int freq, int order=DEFAULT_FILTER_ORDER)
Definition: filter.cc:584
void print_map(EST_TrackMap &t)
EST_StrList empty
void frame(EST_FVector &fv, int n, int startf=0, int nf=EST_ALL)
Definition: EST_Track.h:210
void fill_time(float t, int start=1)
Definition: EST_Track.cc:789
void clear(void)
remove all items in list
Definition: EST_TList.h:244
EST_read_status load(const EST_String filename, int offset=0, ssize_t length=0, int rate=default_sample_rate)
Definition: EST_Wave.cc:180
void sig2lpc(const EST_FVector &sig, EST_FVector &acf, EST_FVector &ref, EST_FVector &lpc)
Definition: sigpr_frame.cc:406
void sigpr_base(EST_Wave &sig, EST_Track &fv, EST_Features &op, const EST_StrList &slist)
Definition: sigpr_utt.cc:138
EST_FVector design_highpass_FIR_filter(int sample_rate, int freq, int order)
Definition: filter.cc:512