Edinburgh Speech Tools  2.1-release
srpd1.3.cc
Go to the documentation of this file.
1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1997 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author : Paul Bagshaw */
34 /* Date : 1993 */
35 /*************************************************************************/
36 /* */
37 /* The above copyright was given by Paul Bagshaw, he retains */
38 /* his original rights */
39 /* */
40 /*************************************************************************/
41  /****************************************************************************
42  * *
43  * Pitch Determination Algorithm. *
44  * *
45  * Super Resolution Pitch Determinator with No Headers (SRPD_HD). *
46  * *
47  * Analysis synchronised with cepstral analysis, pitch biasing option, and *
48  * optimised for minimum gross pitch errors and accurate voiced/unvoiced *
49  * classification. All known bugs resolved! *
50  * *
51  * 4th February 1992: *
52  * Additional option [-w] added to give an artificial frame length, thus *
53  * allowing the output data to be synchronised with other signal processing *
54  * algorithms such as cepstral analysis and formant tracking. *
55  * *
56  * Y. Medan, E. Yair, and D. Chazan, "Super resolution pitch determination *
57  * of speech signals," IEEE Trans. Signal Processing Vol.39 No.1 *
58  * pp.40-48 (1991). *
59  * *
60  * Implementation by Paul Bagshaw, Centre for Speech Technology Research, *
61  * University of Edinburgh, 80 South Bridge, Edinburgh EH1 1HN. *
62  * *
63  *****************************************************************************/
64 
65 /************************
66  * include header files *
67  ************************/
68 
69 #include <cmath>
70 #include <cstdlib>
71 #include <iostream>
72 #include "srpd.h"
73 #include "EST_cutils.h"
74 #include "EST_Wave.h"
75 #include "EST_File.h"
76 
77 
78 using namespace std;
79 
80 #ifndef MAXSHORT
81 #define MAXSHORT 32767
82 #endif
83 
84 void super_resolution_pda (struct Srpd_Op *paras, SEGMENT_ seg,
85  CROSS_CORR_ *p_cc, STATUS_ *p_status)
86 {
87 
88  static int zx_lft_N, zx_rht_N;
89  static double prev_pf = BREAK_NUMBER;
90 
91  int n, j, k, N0 = 0, N1, N2, N_, q, lower_found = 0, score = 1, apply_bias;
92  int x_index, y_index, z_index;
93  int zx_rate = 0, zx_at_N0 = 0, prev_sign;
94  int seg1_zxs = 0, seg2_zxs = 0, total_zxs;
95  short prev_seg1, prev_seg2;
96  short x_max = -MAXSHORT, x_min = MAXSHORT;
97  short y_max = -MAXSHORT, y_min = MAXSHORT;
98  double xx = 0.0, yy = 0.0, zz = 0.0, xy = 0.0, yz = 0.0, xz = 0.0;
99  double max_cc = 0.0, coefficient, coeff_weight;
100  double xx_N, yy_N, xy_N, y1y1_N, xy1_N, yy1_N, beta;
101  LIST_ *sig_pks_hd, *sig_pks_tl, *sig_peak, *head, *tail;
102 
103  sig_pks_hd = head = NULL;
104  sig_pks_tl = tail = NULL;
105  /* set correlation coefficient threshold */
106  if (p_status->v_uv == UNVOICED || p_status->v_uv == SILENT)
107  p_status->threshold = paras->Thigh;
108  else /* p_status->v_uv == VOICED */
109  p_status->threshold = (paras->Tmin > paras->Tmax_ratio *
110  p_status->cc_max) ? paras->Tmin : paras->Tmax_ratio *
111  p_status->cc_max;
112  /* determine if a bias should be applied */
113  if (paras->peak_tracking && prev_pf != BREAK_NUMBER &&
114  p_status->v_uv == VOICED && p_status->s_h != HOLD &&
115  p_status->pitch_freq < 1.75 * prev_pf &&
116  p_status->pitch_freq > 0.625 * prev_pf)
117  apply_bias = 1;
118  else
119  apply_bias = 0;
120  /* consider first two segments of period n = Nmin */
121  prev_seg1 = seg.data[paras->Nmax - paras->Nmin] < 0 ? -1 : 1;
122  prev_seg2 = seg.data[paras->Nmax] < 0 ? -1 : 1;
123  for (j = 0; j < paras->Nmin; j += paras->L) {
124  /* find max and min amplitudes in x and y segments */
125  x_index = paras->Nmax - paras->Nmin + j;
126  y_index = paras->Nmax + j;
127  if (seg.data[x_index] > x_max) x_max = seg.data[x_index];
128  if (seg.data[x_index] < x_min) x_min = seg.data[x_index];
129  if (seg.data[y_index] > y_max) y_max = seg.data[y_index];
130  if (seg.data[y_index] < y_min) y_min = seg.data[y_index];
131  /* does new sample in x or y segment represent an input zero-crossing */
132  if (seg.data[x_index] * prev_seg1 < 0) {
133  prev_seg1 *= -1;
134  seg1_zxs++;
135  }
136  if (seg.data[y_index] * prev_seg2 < 0) {
137  prev_seg2 *= -1;
138  seg2_zxs++;
139  }
140  /* calculate parts for first correlation coefficient */
141  xx += (double) seg.data[x_index] * seg.data[x_index];
142  yy += (double) seg.data[y_index] * seg.data[y_index];
143  xy += (double) seg.data[x_index] * seg.data[y_index];
144  }
145  /* low amplitude segment represents silence */
146  if (abs (x_max) + abs (x_min) < 2 * paras->Tsilent ||
147  abs (y_max) + abs (y_min) < 2 * paras->Tsilent) {
148  for (q = 0; q < p_cc->size; p_cc->coeff[q++] = 0.0);
149  prev_pf = p_status->pitch_freq;
150  p_status->pitch_freq = BREAK_NUMBER;
151  p_status->v_uv = SILENT;
152  p_status->s_h = SEND;
153  p_status->cc_max = 0.0;
154  return;
155  }
156  /* determine first correlation coefficients, for period n = Nmin */
157  p_cc->coeff[0] = p_status->cc_max = xy / sqrt (xx) / sqrt (yy);
158  for (q = 1; q < p_cc->size && q < paras->L; p_cc->coeff[q++] = 0.0);
159  total_zxs = seg1_zxs + seg2_zxs;
160  prev_sign = p_cc->coeff[0] < 0.0 ? -1 : 1;
161  prev_seg1 = seg.data[paras->Nmax - paras->Nmin] < 0 ? -1 : 1;
162  /* iteratively determine correlation coefficient for next possible period */
163  for (n = paras->Nmin + paras->L; n <= paras->Nmax; n += paras->L,
164  j += paras->L) {
165  x_index = paras->Nmax - n;
166  y_index = paras->Nmax + j;
167  /* does new samples in x or y segment represent an input zero-crossing */
168  if (seg.data[x_index] * prev_seg1 < 0) {
169  prev_seg1 *= -1;
170  total_zxs++;
171  }
172  if (seg.data[y_index] * prev_seg2 < 0) {
173  prev_seg2 *= -1;
174  total_zxs++;
175  }
176  /* determine next coefficient */
177  xx += (double) seg.data[x_index] * seg.data[x_index];
178  yy += (double) seg.data[y_index] * seg.data[y_index];
179  for (k = 0, xy = 0.0; k < n; k += paras->L)
180  xy += (double) seg.data[paras->Nmax - n + k] * seg.data[paras->Nmax + k];
181  p_cc->coeff[n - paras->Nmin] = xy / sqrt (xx) / sqrt (yy);
182  if (p_cc->coeff[n - paras->Nmin] > p_status->cc_max)
183  p_status->cc_max = p_cc->coeff[n - paras->Nmin];
184  /* set unknown coefficients to zero */
185  for (q = n - paras->Nmin + 1;
186  q < p_cc->size && q < n - paras->Nmin + paras->L;
187  p_cc->coeff[q++] = 0.0);
188  /* is there a slope with positive gradient in the coefficients track yet */
189  if (p_cc->coeff[n - paras->Nmin] > p_cc->coeff[n - paras->Nmin - paras->L])
190  lower_found = 1;
191  /* has new coefficient resulted in a zero-crossing */
192  if (p_cc->coeff[n - paras->Nmin] * prev_sign < 0.0) {
193  prev_sign *= -1;
194  zx_rate++;
195  }
196  /* does the new coefficient represent a pitch period candidate */
197  if (N0 != 0 && zx_rate > zx_at_N0) {
198  add_to_list (&sig_pks_hd, &sig_pks_tl, N0, 1);
199  N0 = 0;
200  max_cc = 0.0;
201  }
202  if (apply_bias && n > zx_lft_N && n < zx_rht_N)
203  coeff_weight = 2.0;
204  else
205  coeff_weight = 1.0;
206  if (p_cc->coeff[n - paras->Nmin] > max_cc && total_zxs > 3 && lower_found) {
207  max_cc = p_cc->coeff[n - paras->Nmin];
208  if (max_cc * coeff_weight >= p_status->threshold) {
209  zx_at_N0 = zx_rate;
210  N0 = n;
211  }
212  }
213  }
214  /* unvoiced if no significant peak found in coefficients track */
215  if (sig_pks_hd == NULL) {
216  prev_pf = p_status->pitch_freq;
217  p_status->pitch_freq = BREAK_NUMBER;
218  p_status->v_uv = UNVOICED;
219  p_status->s_h = SEND;
220  return;
221  }
222  /* find which significant peak in list corresponds to true pitch period */
223  sig_peak = sig_pks_hd;
224  while (sig_peak != NULL) {
225  yy = zz = yz = 0.0;
226  for (j = 0; j < sig_peak->N0; j++) {
227  y_index = paras->Nmax + j;
228  z_index = paras->Nmax + sig_peak->N0 + j;
229  yy += (double) seg.data[y_index] * seg.data[y_index];
230  zz += (double) seg.data[z_index] * seg.data[z_index];
231  yz += (double) seg.data[y_index] * seg.data[z_index];
232  }
233  if (yy == 0.0 || zz == 0.0)
234  coefficient = 0.0;
235  else
236  coefficient = yz / sqrt (yy) / sqrt (zz);
237  if (apply_bias && sig_peak->N0 > zx_lft_N && sig_peak->N0 < zx_rht_N)
238  coeff_weight = 2.0;
239  else
240  coeff_weight = 1.0;
241  if (coefficient * coeff_weight >= p_status->threshold) {
242  sig_peak->score = 2;
243  if (head == NULL) {
244  head = sig_peak;
245  score = 2;
246  }
247  tail = sig_peak;
248  }
249  sig_peak = sig_peak->next_item;
250  }
251  if (head == NULL) head = sig_pks_hd;
252  if (tail == NULL) tail = sig_pks_tl;
253  N0 = head->N0;
254  if (tail != head) {
255  xx = 0.0;
256  for (j = 0; j < tail->N0; j++)
257  xx += (double) seg.data[paras->Nmax - tail->N0 + j] *
258  seg.data[paras->Nmax - tail->N0 + j];
259  sig_peak = head;
260  while (sig_peak != NULL) {
261  if (sig_peak->score == score) {
262  xz = zz = 0.0;
263  for (j = 0; j < tail->N0; j++) {
264  z_index = paras->Nmax + sig_peak->N0 + j;
265  xz += (double) seg.data[paras->Nmax - tail->N0 + j] *
266  seg.data[z_index];
267  zz += (double) seg.data[z_index] * seg.data[z_index];
268  }
269  coefficient = xz / sqrt (xx) / sqrt (zz);
270  if (sig_peak == head)
271  max_cc = coefficient;
272  else if (coefficient * paras->Tdh > max_cc) {
273  N0 = sig_peak->N0;
274  max_cc = coefficient;
275  }
276  }
277  sig_peak = sig_peak->next_item;
278  }
279  }
280  p_status->cc_max = p_cc->coeff[N0 - paras->Nmin];
281  /* voiced segment period now found */
282  if ((tail == head && score == 1 && p_status->v_uv != VOICED) ||
283  p_cc->coeff[N0 - paras->Nmin] < p_status->threshold)
284  p_status->s_h = HOLD;
285  else
286  p_status->s_h = SEND;
287  /* find left and right boundaries of peak in coefficients track */
288  zx_lft_N = zx_rht_N = 0;
289  for (q = N0; q >= paras->Nmin; q -= paras->L)
290  if (p_cc->coeff[q - paras->Nmin] < 0.0) {
291  zx_lft_N = q;
292  break;
293  }
294  for (q = N0; q <= paras->Nmax; q += paras->L)
295  if (p_cc->coeff[q - paras->Nmin] < 0.0) {
296  zx_rht_N = q;
297  break;
298  }
299  /* define small region around peak */
300  if (N0 - paras->L < paras->Nmin) {
301  N1 = N0;
302  N2 = N0 + 2 * paras->L;
303  }
304  else if (N0 + paras->L > paras->Nmax) {
305  N1 = N0 - 2 * paras->L;
306  N2 = N0;
307  }
308  else {
309  N1 = N0 - paras->L;
310  N2 = N0 + paras->L;
311  }
312  /* compensate for decimation factor L */
313  if (paras->L != 1) {
314  xx = yy = xy = 0.0;
315  for (j = 0; j < N1; j++) {
316  x_index = paras->Nmax - N1 + j;
317  y_index = paras->Nmax + j;
318  xx += (double) seg.data[x_index] * seg.data[x_index];
319  xy += (double) seg.data[x_index] * seg.data[y_index];
320  yy += (double) seg.data[y_index] * seg.data[y_index];
321  }
322  p_cc->coeff[N1 - paras->Nmin] = p_status->cc_max =
323  xy / sqrt (xx) / sqrt (yy);
324  N0 = N1;
325  for (n = N1 + 1; n <= N2; n++, j++) {
326  xx += (double) seg.data[paras->Nmax - n] * seg.data[paras->Nmax - n];
327  yy += (double) seg.data[paras->Nmax + j] * seg.data[paras->Nmax + j];
328  for (k = 0, xy = 0.0; k < n; k++)
329  xy += (double) seg.data[paras->Nmax - n + k] * seg.data[paras->Nmax + k];
330  p_cc->coeff[n - paras->Nmin] = xy / sqrt (xx) / sqrt (yy);
331  if (p_cc->coeff[n - paras->Nmin] > p_status->cc_max) {
332  p_status->cc_max = p_cc->coeff[n - paras->Nmin];
333  N0 = n;
334  }
335  }
336  }
337  /* compensate for finite resolution in estimating pitch */
338  if (N0 - 1 < paras->Nmin || N0 == N1) N_ = N0;
339  else if (N0 + 1 > paras->Nmax || N0 == N2) N_ = N0 - 1;
340  else if (p_cc->coeff[N0 - paras->Nmin] - p_cc->coeff[N0 - paras->Nmin - 1] <
341  p_cc->coeff[N0 - paras->Nmin] - p_cc->coeff[N0 - paras->Nmin + 1])
342  N_ = N0 - 1;
343  else
344  N_ = N0;
345  xx_N = yy_N = xy_N = y1y1_N = xy1_N = yy1_N = 0.0;
346  for (j = 0; j < N_; j++) {
347  x_index = paras->Nmax - N_ + j;
348  y_index = paras->Nmax + j;
349  xx_N += (double) seg.data[x_index] * seg.data[x_index];
350  yy_N += (double) seg.data[y_index] * seg.data[y_index];
351  xy_N += (double) seg.data[x_index] * seg.data[y_index];
352  y1y1_N += (double) seg.data[y_index + 1] * seg.data[y_index + 1];
353  xy1_N += (double) seg.data[x_index] * seg.data[y_index + 1];
354  yy1_N += (double) seg.data[y_index] * seg.data[y_index + 1];
355  }
356  beta = (xy1_N * yy_N - xy_N * yy1_N) /
357  (xy1_N * (yy_N - yy1_N) + xy_N * (y1y1_N - yy1_N));
358  if (beta < 0.0) {
359  N_--;
360  beta = 0.0;
361  }
362  else if (beta >= 1.0) {
363  N_++;
364  beta = 0.0;
365  }
366  else
367  p_status->cc_max = ((1.0 - beta) * xy_N + beta * xy1_N) /
368  sqrt (xx_N * ((1.0 - beta) * (1.0 - beta) * yy_N +
369  2.0 * beta * (1.0 - beta) * yy1_N +
370  beta * beta * y1y1_N));
371  prev_pf = p_status->pitch_freq;
372  p_status->pitch_freq = (double) (paras->sample_freq) / (double) (N_ + beta);
373  p_status->v_uv = VOICED;
374  free_list (&sig_pks_hd);
375  return;
376 
377 }
378 
379 /************* * LEVEL TWO * ************/
380 
381 void add_to_list (LIST_ **p_list_hd, LIST_ **p_list_tl, int N_val,
382  int score_val)
383 {
384 
385  LIST_ *new_node, *last_node;
386 
387  new_node = walloc(LIST_ ,1);
388  last_node = *p_list_tl;
389  new_node->N0 = N_val;
390  new_node->score = score_val;
391  new_node->next_item = NULL;
392  if (*p_list_hd == NULL)
393  *p_list_hd = new_node;
394  else
395  last_node->next_item = new_node;
396  *p_list_tl = new_node;
397 
398 }
399 
400 /********************
401  * define functions *
402  ********************/
403 
404 /************* * LEVEL ONE * ************/
405 
406 void error (error_flags err_type)
407 {
408 
409  char prog[15]; /* program file name */
410 
411  strcpy (prog, "srpd");
412  fprintf (stderr, "%s: ", prog);
413  switch (err_type) {
414  case CANT_WRITE:
415  fprintf (stderr, "cannot write to output file");
416  break;
417  case DECI_FCTR:
418  fprintf (stderr, "decimation factor not set");
419  break;
420  case INSUF_MEM:
421  fprintf (stderr, "insufficient memory available");
422  break;
423  case FILE_ERR:
424  perror ("");
425  break;
426  case FILE_SEEK:
427  fprintf (stderr, "improper fseek () to reposition a stream");
428  break;
429  case LEN_OOR:
430  fprintf (stderr, "artificial frame length set out of range");
431  break;
432  case MAX_FREQ:
433  fprintf (stderr, "maximum pitch frequency value (Hz) not set");
434  break;
435  case MIN_FREQ:
436  fprintf (stderr, "minimum pitch frequency value (Hz) not set");
437  break;
438  case MISUSE:
439  fprintf (stderr, "usage: %s -i lpf_sample_file ", prog);
440  fprintf (stderr, "-o pitch_file [options]\n");
441  fprintf (stderr, "\nOptions {with default values}\n");
442  fprintf (stderr, "-a form pitch_file in ascii format\n");
443  fprintf (stderr, "-l 'lower pitch frequency limit' {%f (Hz)}\n",
445  fprintf (stderr, "-u 'upper pitch frequency limit' {%f (Hz)}\n",
447  fprintf (stderr, "-d 'decimation factor' {%d (samples)}\n",
449  fprintf (stderr, "-n 'noise floor (abs. amplitude)' {%d}\n",
451  fprintf (stderr, "-h 'unvoiced to voiced coeff threshold' {%f}\n",
452  DEFAULT_THIGH);
453  fprintf (stderr, "-m 'min. voiced to unvoiced coeff threshold' {%f}\n",
454  DEFAULT_TMIN);
455  fprintf (stderr, "-r 'voiced to unvoiced coeff threshold ratio' {%f}\n",
457  fprintf (stderr, "-t 'anti pitch doubling/halving threshold' {%f}\n",
458  DEFAULT_TDH);
459  fprintf (stderr, "-p perform peak tracking\n");
460  fprintf (stderr, "-f 'sampling frequency' {%d (Hz)}\n", DEFAULT_SF);
461  fprintf (stderr, "-s 'frame shift' {%f (ms)}\n", DEFAULT_SHIFT);
462  fprintf (stderr, "-w 'artificial frame length' {%f (ms)}\n",
464  break;
465  case NOISE_FLOOR:
466  fprintf (stderr, "noise floor set below minimum amplitude");
467  break;
468  case SAMPLE_FREQ:
469  fprintf (stderr, "attempt to set sampling frequency negative");
470  break;
471  case SFT_OOR:
472  fprintf (stderr, "frame shift set out of range");
473  break;
474  case THR_DH:
475  fprintf (stderr, "anti pitch doubling/halving threshold not set");
476  break;
477  case THR_HIGH:
478  fprintf (stderr, "unvoiced to voiced coeff threshold not set");
479  break;
480  case THR_MAX_RTO:
481  fprintf (stderr, "voiced to unvoiced coeff threshold ratio not set");
482  break;
483  case THR_MIN:
484  fprintf (stderr, "minimum voiced to unvoiced coeff threshold not set");
485  break;
486  default:
487  fprintf (stderr, "undefined error, %u occurred", err_type);
488  break;
489  }
490  fprintf (stderr, "\n");
491  exit (-1);
492 
493 }
494 
495 void initialise_parameters (struct Srpd_Op *p_par)
496 {
497  p_par->L = DEFAULT_DECIMATION;
498  p_par->min_pitch = DEFAULT_MIN_PITCH;
499  p_par->max_pitch = DEFAULT_MAX_PITCH;
500  p_par->shift = DEFAULT_SHIFT;
501  p_par->length = DEFAULT_LENGTH;
502  p_par->Tsilent = DEFAULT_TSILENT;
503  p_par->Tmin = DEFAULT_TMIN;
505  p_par->Thigh = DEFAULT_THIGH;
506  p_par->Tdh = DEFAULT_TDH;
507  p_par->make_ascii = 0;
508  p_par->peak_tracking = 0;
509  p_par->sample_freq = DEFAULT_SF;
510  /* p_par->Nmax and p_par->Nmin cannot be initialised */
511  return;
512 
513 }
514 
515 void initialise_structures (struct Srpd_Op *p_par, SEGMENT_ *p_seg,
516  CROSS_CORR_ *p_cc)
517 {
518  p_par->Nmax = (int) ceil((float)p_par->sample_freq / p_par->min_pitch);
519  p_par->Nmin = (int) floor((float)p_par->sample_freq / p_par->max_pitch);
520  p_par->min_pitch = (float)p_par->sample_freq / (float)p_par->Nmax;
521  p_par->max_pitch = (float)p_par->sample_freq / (float)p_par->Nmin;
522 
523  p_seg->size = 3 * p_par->Nmax;
524  p_seg->shift = (int) rint( p_par->shift / 1000.0 * (float)p_par->sample_freq );
525  p_seg->length = (int) rint( p_par->length / 1000.0 * (float)p_par->sample_freq );
526  p_seg->data = walloc(short,p_seg->size);
527 
528  p_cc->size = p_par->Nmax - p_par->Nmin + 1;
529  p_cc->coeff = walloc(double,p_cc->size);
530 
531  return;
532 }
533 
534 
535 void initialise_status (struct Srpd_Op *paras, STATUS_ *p_status)
536 {
537 
538  p_status->pitch_freq = BREAK_NUMBER;
539  p_status->v_uv = SILENT;
540  p_status->s_h = SEND; /* SENT */
541  p_status->cc_max = 0.0;
542  p_status->threshold = paras->Thigh;
543  return;
544 
545 }
546 
548 {
549 
550  wfree (p_seg->data);
551  wfree (p_cc->coeff);
552  return;
553 
554 }
555 
556 #define BEGINNING 1
557 #define MIDDLE_ 2
558 #define END 3
559 
560 int read_next_segment (FILE *voxfile, struct Srpd_Op *paras, SEGMENT_ *p_seg)
561 {
562 
563  static int status = BEGINNING, padding= -1, tracklen = 0;
564 
565  int samples_read = 0;
566  long init_file_position, offset;
567 
568  if (status == BEGINNING) {
569  if (padding == -1) {
570  if (EST_fseek (voxfile, 0L, 2)) error (FILE_SEEK);
571  tracklen = ((EST_ftell (voxfile) / sizeof (short)) - p_seg->length) /
572  p_seg->shift + 1;
573  cout << "track len " << tracklen;
574  rewind (voxfile);
575  if (paras->Nmax < p_seg->length / 2) {
576  offset = (long) (p_seg->length / 2 - paras->Nmax) * sizeof (short);
577  if (EST_fseek (voxfile, offset, 1)) error (FILE_SEEK);
578  padding = 0;
579  }
580  else {
581  if ((paras->Nmax - p_seg->length / 2) % p_seg->shift != 0) {
582  offset = (long) (p_seg->shift - ((paras->Nmax - p_seg->length / 2) %
583  p_seg->shift)) * sizeof (short);
584  if (EST_fseek (voxfile, offset, 1)) error (FILE_SEEK);
585  }
586  padding = (paras->Nmax - p_seg->length / 2) / p_seg->shift +
587  ((paras->Nmax - p_seg->length / 2) % p_seg->shift == 0 ? 0 : 1);
588  }
589  }
590  cout << "padding " << padding << endl;
591  if (padding-- == 0)
592  status = MIDDLE_;
593  else if (tracklen-- <= 0)
594  return (0);
595  else
596  return (2);
597  }
598  cout << "tl " << tracklen << endl;
599  if (status == MIDDLE_) {
600  if (tracklen > 0) {
601  init_file_position = EST_ftell (voxfile);
602  offset = (long) (p_seg->shift * sizeof (short));
603  samples_read = fread ((short *) p_seg->data, sizeof (short),
604  p_seg->size, voxfile);
605  if (samples_read == p_seg->size) {
606  if (EST_fseek (voxfile, init_file_position + offset, 0)) error (FILE_SEEK);
607  tracklen--;
608  return (1);
609  }
610  else {
611  status = END;
612  }
613  }
614  else
615  return (0);
616  }
617  if (status == END) {
618  if (tracklen-- > 0)
619  return (2);
620  else
621  return (0);
622  }
623  return (0);
624 
625 }
626 
628 {
629  static int status = BEGINNING, padding = -1, tracklen = 0;
630  int i;
631  long offset;
632  static int wave_pos;
633 
634 
635  //printf("read: size %d shift %d length %d\n", p_seg->size, p_seg->shift, p_seg->length);
636 
637  if (status == BEGINNING)
638  {
639  if (padding == -1)
640  {
641  tracklen = (sig.num_samples() - p_seg->length)
642  / p_seg->shift + 1;
643  if (paras->Nmax < p_seg->length / 2)
644  {
645  offset = p_seg->length / 2 - paras->Nmax;
646  wave_pos = offset;
647  padding = 0;
648  }
649  else
650  {
651  if ((paras->Nmax - p_seg->length / 2) % p_seg->shift != 0) {
652  offset = p_seg->shift - ((paras->Nmax - p_seg->length / 2)%
653  p_seg->shift);
654  wave_pos = offset;
655  }
656  padding = (paras->Nmax - p_seg->length / 2) / p_seg->shift +
657  ((paras->Nmax - p_seg->length / 2)
658  % p_seg->shift == 0 ? 0 : 1);
659  }
660  }
661  if (padding-- == 0)
662  status = MIDDLE_;
663  else if (tracklen-- <= 0) {
664  status = BEGINNING;
665  padding = -1;
666  tracklen = 0;
667  return (0);
668  }
669  else
670  return (2);
671  }
672  if (status == MIDDLE_)
673  {
674  if (tracklen > 0)
675  {
676  offset = p_seg->shift;
677  for (i = 0; (i < p_seg->size) && (i+wave_pos)<sig.num_samples();
678  ++i)
679  p_seg->data[i] = sig.a(i + wave_pos,0);
680  for ( ; i < p_seg->size; ++i)
681  p_seg->data[i] = 0;
682 
683  if (wave_pos <= sig.num_samples())
684  {
685  wave_pos += offset;
686  tracklen--;
687  return (1);
688  }
689  else
690  status = END;
691  }
692  else {
693  status = BEGINNING;
694  padding = -1;
695  tracklen = 0;
696  return (0);
697  }
698  }
699  if (status == END)
700  {
701  if (tracklen-- > 0)
702  return (2);
703  else {
704  status = BEGINNING;
705  padding = -1;
706  tracklen = 0;
707  return (0);
708  }
709  }
710  status = BEGINNING;
711  padding = -1;
712  tracklen = 0;
713  return (0);
714 }
715 
716 void write_track(STATUS_ status, struct Srpd_Op paras, FILE *outfile)
717 {
718  if (paras.make_ascii)
719  {
720  if (fprintf(outfile,"%7g\n",status.pitch_freq) != 8)
721  error(CANT_WRITE);
722  }
723  else
724  if (!fwrite ((double *) &status.pitch_freq, sizeof (double), 1, outfile))
725  error (CANT_WRITE);
726  return;
727 
728 }
729 
730 void free_list (LIST_ **p_list_hd)
731 {
732 
733  LIST_ *next;
734 
735  while (*p_list_hd != NULL) {
736  next = (*p_list_hd)->next_item;
737  wfree (*p_list_hd);
738  *p_list_hd = next;
739  }
740 
741 }
A class for storing digital waveforms. The waveform is stored as an array of 16 bit shorts...
Definition: EST_Wave.h:64
Definition: srpd.h:112
#define VOICED
Definition: srpd.h:62
void error(error_flags err_type)
Definition: srpd1.3.cc:406
#define walloc(TYPE, SIZE)
Definition: EST_walloc.h:52
int N0
Definition: srpd.h:106
Definition: srpd.h:80
EST_FilePos EST_ftell(FILE *fp)
Definition: EST_File.h:71
Definition: srpd.h:86
double length
Definition: srpd.h:89
void add_to_list(LIST_ **p_list_hd, LIST_ **p_list_tl, int N_val, int score_val)
Definition: srpd1.3.cc:381
Definition: srpd.h:111
double threshold
Definition: srpd.h:102
#define SILENT
Definition: srpd.h:63
char v_uv
Definition: srpd.h:101
error_flags
Definition: srpd.h:110
int L
Definition: srpd.h:92
Definition: srpd.h:99
int shift
Definition: srpd.h:81
#define UNVOICED
Definition: srpd.h:61
Definition: srpd.h:111
double Tmax_ratio
Definition: srpd.h:93
int peak_tracking
Definition: srpd.h:96
int Nmin
Definition: srpd.h:88
char s_h
Definition: srpd.h:101
#define DEFAULT_TDH
Definition: srpd.h:59
ssize_t num_samples() const
return the number of samples in the waveform
Definition: EST_Wave.h:143
void free_list(LIST_ **p_list_hd)
Definition: srpd1.3.cc:730
struct list * next_item
Definition: srpd.h:107
#define HOLD
Definition: srpd.h:65
short & a(ssize_t i, ssize_t channel=0)
Definition: EST_Wave.cc:128
double cc_max
Definition: srpd.h:102
int sample_freq
Definition: srpd.h:87
#define MAXSHORT
Definition: srpd1.3.cc:81
Definition: srpd.h:111
#define DEFAULT_MAX_PITCH
Definition: srpd.h:50
#define MIDDLE_
Definition: srpd1.3.cc:557
#define DEFAULT_TSILENT
Definition: srpd.h:55
void initialise_structures(struct Srpd_Op *p_par, SEGMENT_ *p_seg, CROSS_CORR_ *p_cc)
Definition: srpd1.3.cc:515
double shift
Definition: srpd.h:89
double Tdh
Definition: srpd.h:93
int make_ascii
Definition: srpd.h:95
void initialise_status(struct Srpd_Op *paras, STATUS_ *p_status)
Definition: srpd1.3.cc:535
double * coeff
Definition: srpd.h:76
double min_pitch
Definition: srpd.h:90
#define DEFAULT_TMIN
Definition: srpd.h:56
void initialise_parameters(struct Srpd_Op *p_par)
Definition: srpd1.3.cc:495
#define DEFAULT_THIGH
Definition: srpd.h:58
#define DEFAULT_MIN_PITCH
Definition: srpd.h:49
int read_next_wave_segment(EST_Wave &sig, Srpd_Op *paras, SEGMENT_ *p_seg)
Definition: srpd1.3.cc:627
Definition: srpd.h:105
int size
Definition: srpd.h:81
void super_resolution_pda(struct Srpd_Op *paras, SEGMENT_ seg, CROSS_CORR_ *p_cc, STATUS_ *p_status)
Definition: srpd1.3.cc:84
EST_String outfile
NULL
Definition: EST_WFST.cc:55
#define END
Definition: srpd1.3.cc:558
#define DEFAULT_LENGTH
Definition: srpd.h:54
int score
Definition: srpd.h:106
int EST_fseek(FILE *fp, EST_FilePos offset, int whence)
Definition: EST_File.h:75
#define BEGINNING
Definition: srpd1.3.cc:556
Definition: srpd.h:112
Definition: srpd.h:112
short * data
Definition: srpd.h:82
#define DEFAULT_TMAX_RATIO
Definition: srpd.h:57
getString int
Definition: EST_item_aux.cc:50
void end_structure_use(SEGMENT_ *p_seg, CROSS_CORR_ *p_cc)
Definition: srpd1.3.cc:547
double pitch_freq
Definition: srpd.h:100
#define SEND
Definition: srpd.h:67
#define DEFAULT_SHIFT
Definition: srpd.h:53
int Nmax
Definition: srpd.h:88
int Tsilent
Definition: srpd.h:94
int length
Definition: srpd.h:81
#define DEFAULT_SF
Definition: srpd.h:52
#define DEFAULT_DECIMATION
Definition: srpd.h:48
double Tmin
Definition: srpd.h:93
double Thigh
Definition: srpd.h:93
int size
Definition: srpd.h:75
double max_pitch
Definition: srpd.h:91
#define BREAK_NUMBER
Definition: srpd.h:46
void wfree(void *p)
Definition: walloc.c:131
void write_track(STATUS_ status, struct Srpd_Op paras, FILE *outfile)
Definition: srpd1.3.cc:716
Definition: srpd.h:112
Definition: srpd.h:112
Definition: srpd.h:113
int read_next_segment(FILE *voxfile, struct Srpd_Op *paras, SEGMENT_ *p_seg)
Definition: srpd1.3.cc:560