Edinburgh Speech Tools  2.1-release
pitchmark.cc
Go to the documentation of this file.
1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1996 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author: Paul Taylor */
34 /* Date : December 1997 */
35 /*-----------------------------------------------------------------------*/
36 /* Pitchmark Laryngograph Signals */
37 /* */
38 /*=======================================================================*/
39 
40 /* Note - this is based on a pitchmarker developed by Mike Macon and
41 written in matlab.
42 */
43 
44 #include "stdlib.h"
45 #include "sigpr/EST_filter.h"
46 #include "sigpr/EST_pitchmark.h"
48 #include "EST_math.h"
49 #include "EST_inline_utils.h"
50 #include "EST_wave_aux.h"
51 #include "EST_track_aux.h"
52 
53 using namespace std;
54 
55 void delta(EST_Wave &tr, EST_Wave &d, int regression_length);
56 
57 EST_Track pitchmark(EST_Wave &lx, int lx_lf, int lx_lo, int lx_hf,
58  int lx_ho, int df_lf, int df_lo, int mo, int debug)
59 {
60  EST_Track pm;
61  EST_Wave lxdiff;
62 
63  pm.set_equal_space(false);
64  // pre-filtering
65 
66  if (debug)
67  cout << "pitchmark 1\n";
68 
69  FIRlowpass_double_filter(lx, lx_lf, lx_lo);
70  FIRhighpass_double_filter(lx, lx_hf, lx_ho);
71 
72  if (debug)
73  cout << "pitchmark 2\n";
74 
75  if (debug)
76  lx.save("tmpfilt.lx");
77 
78 // cout << "df " << df_lf << " df_o " << df_lo << endl;
79 
80 // lxdiff = lx;
81 // differentiate(lxdiff);
82  lxdiff.resize(lx.num_samples());
83  lxdiff.set_sample_rate(lx.sample_rate());
84  delta(lx, lxdiff, 4);
85 
86  if (debug)
87  lxdiff.save("tmpdiff.lx");
88 
89  // it was found that median smoothing worked better here.
90 
91  if (df_lo > 0)
92  FIRlowpass_double_filter(lxdiff, df_lf, df_lo);
93 
94  if (mo > 0)
95  simple_mean_smooth(lxdiff, mo);
96 
97  if (debug)
98  lxdiff.save("tmpfiltdiff.lx");
99 
100  neg_zero_cross_pick(lxdiff, pm);
101 
102  return pm;
103 }
104 
106 {
107  EST_Track pm;
108  EST_Wave lxdiff;
109  int lx_lf, lx_lo, lx_hf, lx_ho, df_lf, df_lo, mo, debug;
110 
111  lx_lf = op.present("lx_low_frequency") ?
112  op.I("lx_low_frequency") : 400;
113  lx_lo = op.present("lx_low_order") ?
114  op.I("lx_low_order") : 19;
115 
116  lx_hf = op.present("lx_high_frequency") ?
117  op.I("lx_high_frequency") : 40;
118  lx_ho = op.present("lx_high_order") ?
119  op.I("lx_high_order") : 19;
120 
121  df_lf = op.present("df_low_frequency") ?
122  op.I("df_low_frequency") : 1000;
123  df_lo = op.present("df_low_order") ?
124  op.I("df_low_order") : 0;
125 
126  mo = op.present("median_order") ?
127  op.I("median_order") : 19;
128 
129  debug = op.present("pm_debug") ? 1 : 0;
130 
131  return pitchmark(lx, lx_lf, lx_lo, lx_hf, lx_ho, df_lf, df_lo,
132  mo, debug);
133 }
134 
135 /** Iterate through track and eliminate any frame whose distance to a
136 preceding frames is less than min seconds*/
137 
138 void pm_min_check(EST_Track &pm, float min)
139 {
140  int i, j;
141 
142  for (i = j = 0; i < pm.num_frames() - 1; ++i, ++j)
143  {
144  pm.t(j) = pm.t(i);
145  while ((i < (pm.num_frames() - 1)) && ((pm.t(i + 1) - pm.t(i)) < min))
146  ++i;
147  }
148  if (i < pm.num_frames())
149  pm.t(j) = pm.t(i);
150  pm.resize(j, pm.num_channels());
151 }
152 
153 
154 void pm_fill(EST_Track &pm, float new_end, float max, float min, float def)
155 {
156  EST_FVector new_pm;
157 
158  if (new_end < 0)
159  new_end = pm.end();
160 
161 // if (debug)
162  // cout<< "new end:" << new_end << endl;
163  // largest possible set of new pitchmarks
164 
165 // cout << "num frames:" << pm.num_frames() << endl;
166 // cout << "num frames:" << pm.end() << endl;
167 // cout << "num frames:" << min << endl;
168  new_pm.resize(int(new_end / min));
169 // cout << "num frames:" << pm.end()/min << endl;
170 // cout << "num frames:" << new_pm.n() << endl;
171 
172  int i, j, npm=0;
173  float last = 0.0;
174 
175  int dropped=0, added=0;
176 
177  for(j = 0; j < pm.num_frames(); j++)
178  {
179  float current = pm.t(j);
180 
181  if (current > new_end)
182  break;
183 
184  if (current - last < min)
185  {
186  // drop current pitchmark
187  dropped++;
188  }
189 
190  else if (current-last > max)
191  {
192  // interpolate
193  int num = ifloor((current - last)/ def);
194  float size = (current-last) / num;
195  for (i = 1; i <= num; i++)
196  {
197  new_pm[npm] = last + i * size;
198  npm++;
199  added++;
200  }
201  }
202  else
203  {
204  new_pm[npm] = pm.t(j);
205  npm++;
206  }
207  last=current;
208  }
209 
210  if (new_end - last > max)
211  {
212  // interpolate
213  int num = ifloor((new_end - last)/ def);
214  float size = (new_end -last) / num;
215  for (i = 1; i <= num; i++)
216  {
217  new_pm[npm] = last + i * size;
218  npm++;
219  added++;
220  }
221  }
222 
223 // if (debug)
224 // if (dropped>0 || added >0)
225 // cout << "Dropped " << dropped<< " and added " << added << " PMs\n";
226 
227 // if (debug)
228  pm.resize(npm, pm.num_channels());
229  for (i = 0; i < npm; i++)
230  pm.t(i) = new_pm(i);
231 }
232 
234 {
235  int i, j;
236  pm.resize(lx.num_samples(), EST_CURRENT);
237 
238  for (i = 1, j = 0; i < lx.num_samples(); ++i)
239  if ((lx.a(i -1) > 0) && (lx.a(i) <= 0))
240  pm.t(j++) = lx.t(i);
241 
242  pm.resize(j, EST_CURRENT);
243 
244  for (i = 0; i < pm.num_frames(); ++i)
245  pm.set_value(i);
246 }
247 
249 {
250  EST_Item *seg;
251  lab.clear();
252 
253  for (int i = 0; i < pm.num_frames(); ++i)
254  {
255  seg = lab.append();
256  seg->set("name","");
257  seg->set("end",pm.t(i));
258  }
259 }
260 
262 {
263  float prev_pm = 0.0;
264  f0 = pm;
265  f0.resize(EST_ALL, 1);
266 
267  for (ssize_t i = 0; i < f0.num_frames(); ++i)
268  {
269  f0.a(i, 0) = 1.0 / (f0.t(i) - prev_pm);
270  prev_pm = f0.t(i);
271  }
272 }
273 
274 void pm_to_f0(EST_Track &pm, EST_Track &fz, float shift)
275 {
276  ssize_t i;
277  float period;
278 
279  fz.resize((ssize_t)(pm.end()/shift), 1);
280  fz.fill_time(shift);
281 
282  for (i = 0; i < fz.num_frames() -1 ; ++i)
283  {
284  period = get_time_frame_size(pm, pm.index_below(fz.t(i)));
285  fz.a(i) = 1.0 /period;
286  }
287 }
A class for storing digital waveforms. The waveform is stored as an array of 16 bit shorts...
Definition: EST_Wave.h:64
void set_sample_rate(const int n)
Set sampling rate to n
Definition: EST_Wave.h:149
void pm_fill(EST_Track &pm, float new_end, float max, float min, float def)
Definition: pitchmark.cc:154
float end() const
return time of last value in track
Definition: EST_Track.cc:587
void simple_mean_smooth(EST_Track &c, ssize_t n, ssize_t channel=0)
A vector class for floating point numbers. EST_FVector x should be used instead of float *x wherever ...
Definition: EST_FMatrix.h:119
void set_value(ssize_t i)
set frame i to be a value
Definition: EST_Track.cc:133
void set(const EST_String &name, ssize_t ival)
Definition: EST_Item.h:185
int num_channels() const
return number of channels in track
Definition: EST_Track.h:657
void pm_to_label(EST_Track &pm, EST_Relation &lab)
Definition: pitchmark.cc:248
ssize_t index_below(float x) const
return the frame index before time t
Definition: EST_Track.cc:521
const int EST_CURRENT
void pm_min_check(EST_Track &pm, float min)
Definition: pitchmark.cc:138
ssize_t num_samples() const
return the number of samples in the waveform
Definition: EST_Wave.h:143
int ssize_t
const int EST_ALL
void resize(ssize_t num_frames, int num_channels, bool preserve=1)
Definition: EST_Track.cc:214
short & a(ssize_t i, ssize_t channel=0)
Definition: EST_Wave.cc:128
EST_write_status save(const EST_String filename, const EST_String EST_filetype="")
Definition: EST_Wave.cc:355
float max(float a, float b)
Definition: EST_cluster.cc:143
EST_Track pitchmark(EST_Wave &lx, int lx_lf, int lx_lo, int lx_hf, int lx_ho, int df_lf, int df_lo, int mo, int debug)
Definition: pitchmark.cc:57
float & t(ssize_t i=0)
return time position of frame i
Definition: EST_Track.h:478
float & a(ssize_t i, int c=0)
Definition: EST_Track.cc:1025
void pm_to_f0(EST_Track &pm, EST_Track &f0)
Definition: pitchmark.cc:261
void FIRlowpass_double_filter(EST_Wave &sigin, int freq, int order=DEFAULT_FILTER_ORDER)
Definition: filter.cc:549
float min(float a, float b)
Definition: EST_cluster.cc:138
void delta(EST_Wave &tr, EST_Wave &d, int regression_length)
Definition: delta.cc:92
int present(const EST_String &name) const
ssize_t num_frames() const
return number of frames in track
Definition: EST_Track.h:651
void resize(int num_samples, int num_channels=EST_ALL, int set=1)
resize the waveform
Definition: EST_Wave.h:184
int sample_rate() const
return the sampling rate (frequency)
Definition: EST_Wave.h:147
void FIRhighpass_double_filter(EST_Wave &sigin, int freq, int order=DEFAULT_FILTER_ORDER)
Definition: filter.cc:584
float t(ssize_t i) const
return the time position in seconds of the ith sample
Definition: EST_Wave.h:137
EST_Item * append(EST_Item *si)
Definition: EST_Relation.cc:88
int I(const EST_String &path) const
Definition: EST_Features.h:147
float get_time_frame_size(EST_Track &pms, int i, int prefer_prev=0)
Definition: sigpr_utt.cc:333
void set_equal_space(bool t)
Definition: EST_Track.h:675
void fill_time(float t, int start=1)
Definition: EST_Track.cc:789
void resize(int n, int set=1)
resize vector
void neg_zero_cross_pick(EST_Wave &lx, EST_Track &pm)
Definition: pitchmark.cc:233