Edinburgh Speech Tools  2.1-release
rateconv.cc
Go to the documentation of this file.
1 /*
2  * $Id: rateconv.cc,v 1.4 2004/09/29 08:24:16 robert Exp $
3  *
4  * RATECONV.C
5  *
6  * Convert sampling rate stdin to stdout
7  *
8  * Copyright (c) 1992, 1995 by Markus Mummert
9  *
10  *****************************************************************************
11  * MODIFIED BY Alan W Black (awb@cstr.ed.ac.uk)
12  * Make it compilable under C++
13  * and integrate into Edinburgh Speech Tools (i.e. no longer
14  * reads from stdin / writes to stdout)
15  * Removed interface functions
16  * ansified function calls
17  * made it work in floats rather than ints
18  * I got the original from a random linux site, the original
19  * author's email is <mum@mmk.e-technik.tu-muenchen.de>
20  *****************************************************************************
21  *
22  * Redistribution and use of this software, modification and inclusion
23  * into other forms of software are permitted provided that the following
24  * conditions are met:
25  *
26  * 1. Redistributions of this software must retain the above copyright
27  * notice, this list of conditions and the following disclaimer.
28  * 2. If this software is redistributed in a modified condition
29  * it must reveal clearly that it has been modified.
30  *
31  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
32  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
33  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
34  * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR
35  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
36  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
37  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
38  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
39  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
40  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
41  * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
42  * DAMAGE.
43  *
44  *
45  * history: 2.9.92 begin coding
46  * 5.9.92 fully operational
47  * 14.2.95 provide BIG_ENDIAN, SWAPPED_BYTES_DEFAULT
48  * switches, Copyright note and References
49  * 25.11.95 changed XXX_ENDIAN to I_AM_XXX_ENDIAN
50  * default gain set to 0.8
51  * 3.12.95 stereo implementation
52  * SWAPPED_BYTES_DEFAULT now HBYTE1ST_DEFAULT
53  * changed [L/2] to (L-1)/2 for exact symmetry
54  *
55  *
56  * IMPLEMENTATION NOTES
57  *
58  * Converting is achieved by interpolating the input samples in
59  * order to evaluate the represented continuous input slope at
60  * sample instances of the new rate (resampling). It is implemented
61  * as a polyphase FIR-filtering process (see reference). The rate
62  * conversion factor is determined by a rational factor. Its
63  * nominator and denominator are integers of almost arbitrary
64  * value, limited only by coefficient memory size.
65  *
66  * General rate conversion formula:
67  *
68  * out(n*Tout) = SUM in(m*Tin) * g((n*d/u-m)*Tin) * Tin
69  * over all m
70  *
71  * FIR-based rate conversion formula for polyphase processing:
72  *
73  * L-1
74  * out(n*Tout) = SUM in(A(i,n)*Tin) * g(B(i,n)*Tin) * Tin
75  * i=0
76  *
77  * A(i,n) = i - (L-1)/2 + [n*d/u]
78  * = i - (L-1)/2 + [(n%u)*d/u] + [n/u]*d
79  * B(i,n) = n*d/u - [n*d/u] + (L-1)/2 - i
80  * = ((n%u)*d/u)%1 + (L-1)/2 - i
81  * Tout = Tin * d/u
82  *
83  * where:
84  * n,i running integers
85  * out(t) output signal sampled at t=n*Tout
86  * in(t) input signal sampled in intervals Tin
87  * u,d up- and downsampling factor, integers
88  * g(t) interpolating function
89  * L FIR-length of realized g(t), integer
90  * / float-division-operator
91  * % float-modulo-operator
92  * [] integer-operator
93  *
94  * note:
95  * (L-1)/2 in A(i,n) can be omitted as pure time shift yielding
96  * a causal design with a delay of ((L-1)/2)*Tin.
97  * n%u is a cyclic modulo-u counter clocked by out-rate
98  * [n/u]*d is a d-increment counter, advanced when n%u resets
99  * B(i,n)*Tin can take on L*u different values, at which g(t)
100  * has to be sampled as a coefficient array
101  *
102  * Interpolation function design:
103  *
104  * The interpolation function design is based on a sinc-function
105  * windowed by a gaussian function. The former determines the
106  * cutoff frequency, the latter limits the necessary FIR-length by
107  * pushing the outer skirts of the resulting impulse response below
108  * a certain threshold fast enough. The drawback is a smoothed
109  * cutoff inducing some aliasing. Due to the symmetry of g(t) the
110  * group delay of the filtering process is constant (linear phase).
111  *
112  * g(t) = 2*fgK*sinc(pi*2*fgK*t) * exp(-pi*(2*fgG*t)**2)
113  *
114  * where:
115  * fgK cutoff frequency of sinc function in f-domain
116  * fgG key frequency of gaussian window in f-domain
117  * reflecting the 6.82dB-down point
118  *
119  * note:
120  * Taking fsin=1/Tin as the input sampling frequency, it turns out
121  * that in conjunction with L, u and d only the ratios fgK/(fsin/2)
122  * and fgG/(fsin/2) specify the whole process. Requiring fsin, fgK
123  * and fgG as input purposely keeps the notion of absolute
124  * frequencies.
125  *
126  * Numerical design:
127  *
128  * Samples are expected to be 16bit-signed integers, alternating
129  * left and right channel in case of stereo mode- The byte order
130  * per sample is selectable. FIR-filtering is implemented using
131  * 32bit-signed integer arithmetic. Coefficients are scaled to
132  * find the output sample in the high word of accumulated FIR-sum.
133  *
134  * Interpolation can lead to sample magnitudes exceeding the
135  * input maximum. Worst case is a full scale step function on the
136  * input. In this case the sinc-function exhibits an overshoot of
137  * 2*9=18percent (Gibb's phenomenon). In any case sample overflow
138  * can be avoided by a gain of 0.8.
139  *
140  * If u=d=1 and if the input stream contains only a single sample,
141  * the whole length of the FIR-filter will be written to the output.
142  * In general the resulting output signal will be (L-1)*fsout/fsin
143  * samples longer than the input signal. The effect is that a
144  * finite input sequence is viewed as padded with zeros before the
145  * `beginning' and after the `end'.
146  *
147  * The output lags ((L-1)/2)*Tin behind to implement g(t) as a
148  * causal system corresponding to a causal relationship of the
149  * discrete-time sequences in(m/fsin) and out(n/fsout) with
150  * respect to a sequence time origin at t=n*Tin=m*Tout=0.
151  *
152  *
153  * REFERENCES
154  *
155  * Crochiere, R. E., Rabiner, L. R.: "Multirate Digital Signal
156  * Processing", Prentice-Hall, Englewood Cliffs, New Jersey, 1983
157  *
158  * Zwicker, E., Fastl, H.: "Psychoacoustics - Facts and Models",
159  * Springer-Verlag, Berlin, Heidelberg, New-York, Tokyo, 1990
160  */
161 
162 #include <cmath>
163 #include <cstdio>
164 #include <fcntl.h>
165 #include <cstring>
166 #include "rateconv.h"
167 
168 /*
169  * adaptable defines and globals
170  */
171 #define BYTE char /* signed or unsigned */
172 #define WORD short /* signed or unsigned, fit two BYTEs */
173 #define LONG int /* signed, fit two WORDs */
174 
175 #ifndef MAXUP
176 #define MAXUP 0x400 /* MAXUP*MAXLENGTH worst case malloc */
177 #endif
178 
179 #ifndef MAXLENGTH
180 #define MAXLENGTH 0x400 /* max FIR length */
181 #endif
182  /* accounts for mono samples, means */
183 #define OUTBUFFSIZE (2*MAXLENGTH) /* fit >=MAXLENGHT stereo samples */
184 #define INBUFFSIZE (4*MAXLENGTH) /* fit >=2*MAXLENGTH stereo samples */
185 #define sqr(a) ((a)*(a))
186 
187 #ifndef M_PI
188 #define M_PI 3.14159265358979
189 #endif
190 
191 /* AWB deleted previous byte swap globals, byteswap is done external to */
192 /* this function */
193 
194 #ifdef STEREO_DEFAULT
195 static int g_monoflag = 0;
196 #else
197 static int g_monoflag = -1;
198 #endif
199 
200 /*
201  * other globals
202  */
203 static double g_ampli = 0.8; /* default gain, don't change */
204 static int
205 /* g_infilehandle = 0, */ /* stdin */
206 /* g_outfilehandle = 1, */ /* stdout */
207  g_firlen, /* FIR-length */
208  g_up, /* upsampling factor */
209  g_down /* downsampling factor */
210 ;
211 
212 static float
213  g_sin[INBUFFSIZE], /* input buffer */
214  g_sout[OUTBUFFSIZE], /* output buffer */
215  *g_coep; /* coefficient array pointer */
216 
217 static double
218  g_fsi, /* input sampling frequency */
219  g_fgk, /* sinc-filter cutoff frequency */
220  g_fgg /* gaussian window key frequency */
221 ; /* (6.8dB down freq. in f-domain) */
222 
223 
224 /*
225  * evaluate sinc(x) = sin(x)/x safely
226  */
227 static double sinc(double x)
228 {
229  return(fabs(x) < 1E-50 ? 1.0 : sin(fmod(x,2*M_PI))/x);
230 }
231 
232 /*
233  * evaluate interpolation function g(t) at t
234  * integral of g(t) over all times is expected to be one
235  */
236 static double interpol_func(double t,double fgk,double fgg)
237 {
238  return (2*fgk*sinc(M_PI*2*fgk*t)*exp(-M_PI*sqr(2*fgg*t)));
239 }
240 
241 /*
242  * evaluate coefficient from i, q=n%u by sampling interpolation function
243  * and scale it for integer multiplication used by FIR-filtering
244  */
245 static float coefficient(int i,int q,int firlen,double fgk,double fgg,
246  double fsi,int up,int down,double amp)
247 {
248  float val;
249  double d;
250 
251  d = interpol_func((fmod(q*down/(double)up,1.) + (firlen-1)/2. - i) / fsi,
252  fgk,
253  fgg);
254  val = amp * d/fsi;
255  return val;
256 }
257 
258 /*
259  * transfer n floats from s to d
260  */
261 static void transfer_int(float *s,float *d,int n)
262 {
263  memmove(d,s,sizeof(float)*n);
264 }
265 
266 /*
267  * zerofill n floats from s
268  */
269 static void zerofill(float *s,int n)
270 {
271  memset(s,0,n*(sizeof(float)));
272 }
273 
274 /*
275  * FIR-routines, mono and stereo
276  * this is where we need all the MIPS
277  */
278 void fir_mono(float *inp,float *coep,int firlen,float *outp)
279 {
280  float akku = 0, *endp;
281  int n1 = (firlen / 8) * 8, n0 = firlen % 8;
282 
283  endp = coep + n1;
284  while (coep != endp) {
285  akku += *inp++ * *coep++;
286  akku += *inp++ * *coep++;
287  akku += *inp++ * *coep++;
288  akku += *inp++ * *coep++;
289  akku += *inp++ * *coep++;
290  akku += *inp++ * *coep++;
291  akku += *inp++ * *coep++;
292  akku += *inp++ * *coep++;
293  }
294 
295  endp = coep + n0;
296  while (coep != endp) {
297  akku += *inp++ * *coep++;
298  }
299 
300  *outp = akku;
301 }
302 
303 static void fir_stereo(float *inp,float *coep,int firlen,float *out1p,float *out2p)
304 {
305  float akku1 = 0, akku2 = 0, *endp;
306  int n1 = (firlen / 8) * 8, n0 = firlen % 8;
307 
308  endp = coep + n1;
309  while (coep != endp) {
310  akku1 += *inp++ * *coep;
311  akku2 += *inp++ * *coep++;
312  akku1 += *inp++ * *coep;
313  akku2 += *inp++ * *coep++;
314  akku1 += *inp++ * *coep;
315  akku2 += *inp++ * *coep++;
316  akku1 += *inp++ * *coep;
317  akku2 += *inp++ * *coep++;
318  akku1 += *inp++ * *coep;
319  akku2 += *inp++ * *coep++;
320  akku1 += *inp++ * *coep;
321  akku2 += *inp++ * *coep++;
322  akku1 += *inp++ * *coep;
323  akku2 += *inp++ * *coep++;
324  akku1 += *inp++ * *coep;
325  akku2 += *inp++ * *coep++;
326  }
327 
328  endp = coep + n0;
329  while (coep != endp) {
330  akku1 += *inp++ * *coep;
331  akku2 += *inp++ * *coep++;
332  }
333  *out1p = akku1;
334  *out2p = akku2;
335 }
336 
337 /*
338  * filtering from input buffer to output buffer;
339  * returns number of processed samples in output buffer:
340  * if it is not equal to output buffer size,
341  * the input buffer is expected to be refilled upon entry, so that
342  * the last firlen numbers of the old input buffer are
343  * the first firlen numbers of the new input buffer;
344  * if it is equal to output buffer size, the output buffer
345  * is full and is expected to be stowed away;
346  *
347  */
348 static int inbaseidx = 0, inoffset = 0, cycctr = 0, outidx = 0;
349 
350 static int filtering_on_buffers
351  (float *inp,int insize,float *outp, int outsize,
352  float *coep,int firlen,int up,int down,int monoflag)
353 {
354 
355  if (monoflag) {
356  while (-1) {
357  inoffset = (cycctr * down)/up;
358  if ((inbaseidx + inoffset + firlen) > insize) {
359  inbaseidx -= insize - firlen + 1;
360  return(outidx);
361  }
362  fir_mono(inp + inoffset + inbaseidx,
363  coep + cycctr * firlen,
364  firlen, outp + outidx++);
365  cycctr++;
366  if (!(cycctr %= up))
367  inbaseidx += down;
368  if (!(outidx %= outsize))
369  return(outsize);
370  }
371  }
372  else {
373  /*
374  * rule how to convert mono routine to stereo routine:
375  * firlen, up, down and cycctr relate to samples in general,
376  * wether mono or stereo; inbaseidx, inoffset and outidx as
377  * well as insize and outsize still account for mono samples.
378  */
379  while (-1) {
380  inoffset = 2*((cycctr * down)/up);
381  if ((inbaseidx + inoffset + 2*firlen) > insize) {
382  inbaseidx -= insize - 2*firlen + 2;
383  return(outidx);
384  }
385  fir_stereo(inp + inoffset + inbaseidx,
386  coep + cycctr * firlen, firlen,
387  outp + outidx, outp + outidx + 1);
388  outidx += 2;
389  cycctr++;
390  if (!(cycctr %= up))
391  inbaseidx += 2*down;
392  if (!(outidx %= outsize))
393  return(outsize);
394  }
395  }
396 }
397 
398 /*
399  * set up coefficient array
400  */
401 static void make_coe(void)
402 {
403  int i, q;
404 
405  for (i = 0; i < g_firlen; i++) {
406  for (q = 0; q < g_up; q++) {
407  g_coep[q * g_firlen + i] = coefficient(i, q, g_firlen,
408  g_fgk, g_fgg, g_fsi, g_up, g_down, g_ampli);
409  }
410  }
411 }
412 
413 /***********************************************************************/
414 /* Serious modifications by Alan W Black (awb@cstr.ed.ac.uk) */
415 /* to interface with rest of system // deleted various io functions */
416 /* too. */
417 /***********************************************************************/
418 static WORD *inbuff = NULL;
419 static int inpos;
420 static int inmax;
421 static WORD *outbuff = NULL;
422 static int outpos;
423 static int outmax;
424 
425 static int ioerr(void)
426 {
427  delete[] g_coep;
428  return -1;
429 }
430 
431 static int gcd(int x, int y)
432 {
433  int remainder,a,b;
434 
435  if ((x < 1) || (y < 1))
436  return -1;
437 
438  for (a=x,b=y; b != 0; )
439  {
440  remainder = a % b;
441  a = b;
442  b = remainder;
443  }
444  return a;
445 }
446 
447 static int find_ratios(int in_samp_freq,int out_samp_freq,int *up,int *down)
448 {
449  // Find ratios
450  int d;
451 
452  d = gcd(in_samp_freq,out_samp_freq);
453  if (d == -1) return -1;
454  *down = in_samp_freq / d;
455  *up = out_samp_freq / d;
456 
457  if ((*up > 1024) || (*down > 1024))
458  return -1; // should try harder
459 
460  return 0;
461 }
462 
463 static int intimport(float *buff, int n)
464 {
465  /* Import n more samples from PWave into buff */
466  int i,end;
467 
468  if ((inpos+n) >= inmax)
469  end = inmax - inpos;
470  else
471  end = n;
472  for (i=0;i < end; i++)
473  buff[i] = inbuff[inpos++];
474 
475  return i;
476 }
477 
478 static int intexport(float *buff, int n)
479 {
480  /* Export n samples from buff into end of PWave */
481  int i,end;
482 
483  if ((outpos+n) >= outmax)
484  end = outmax - inpos;
485  else
486  end = n;
487  for (i=0;i < end; i++)
488  outbuff[outpos++] = (short)buff[i];
489 
490  return i;
491 }
492 
493 static int init_globs(WORD *in,int insize, WORD **out, int *outsize,
494  int in_samp_freq, int out_samp_freq)
495 {
496  int new_size;
497  g_monoflag = 1; /* always mono */
498  if (find_ratios(in_samp_freq,out_samp_freq,&g_up,&g_down) == -1)
499  return -1;
500  g_fsi = 1.0; /* ? in_samp_freq ? */
501  if (g_up > g_down)
502  { // upsampling
503  g_fgg = 0.0116;
504  g_fgk = 0.461;
505  g_firlen = (int)(162 * (float)g_up/(float)g_down);
506  }
507  else
508  { // downsampling
509  g_fgg = (float)g_up/(float)g_down * 0.0116;
510  g_fgk = (float)g_up/(float)g_down * 0.461;
511  g_firlen = (int)(162 * (float)g_down/(float)g_up);
512  }
513  if (g_firlen < 1 || g_firlen > MAXLENGTH)
514  return -1;
515  g_ampli = 0.8;
516  g_coep = new float[g_firlen * g_up];
517 
518  inpos = 0;
519  inmax = insize;
520  inbuff = in;
521  new_size = (int)(((float)out_samp_freq/(float)in_samp_freq)*
522  1.1*insize)+2000;
523  *out = new WORD[new_size];
524  outbuff = *out;
525  outmax = new_size;
526  *outsize = 0;
527  outpos = 0;
528 
529  /* For filter_on_buffers */
530  inbaseidx = 0;
531  inoffset = 0;
532  cycctr = 0;
533  outidx = 0;
534 
535  return 0;
536 }
537 
538 
539 /*
540  * External call added by Alan W Black, 4th June 1996
541  * a combination of parse args and main
542  */
543 int rateconv(short *in,int isize, short **out, int *osize,
544  int in_samp_freq, int out_samp_freq)
545 {
546  int insize = 0, outsize = 0, skirtlen;
547 
548  if (init_globs(in,isize,out,osize,in_samp_freq,out_samp_freq) == -1)
549  return -1;
550 
551  make_coe();
552  skirtlen = (g_firlen - 1) * (g_monoflag ? 1 : 2);
553  zerofill(g_sin, skirtlen);
554  do {
555  insize = intimport(g_sin + skirtlen, INBUFFSIZE - skirtlen);
556  if (insize < 0 || insize > INBUFFSIZE - skirtlen)
557  return ioerr();
558  do {
559  outsize = filtering_on_buffers(g_sin, skirtlen + insize,
560  g_sout, OUTBUFFSIZE,
561  g_coep, g_firlen, g_up, g_down,
562  g_monoflag);
563  if (outsize != OUTBUFFSIZE) {
564  transfer_int(g_sin + insize, g_sin, skirtlen);
565  break;
566  }
567  if (intexport(g_sout, outsize) != outsize)
568  return ioerr();
569  } while (-1);
570  } while (insize > 0);
571  zerofill(g_sin + skirtlen, skirtlen);
572  do {
573  outsize = filtering_on_buffers(g_sin, skirtlen + skirtlen,
574  g_sout, OUTBUFFSIZE,
575  g_coep, g_firlen, g_up, g_down,
576  g_monoflag);
577  if (intexport(g_sout, outsize) != outsize)
578  return ioerr();
579  } while (outsize == OUTBUFFSIZE);
580 
581  delete[] g_coep;
582 
583  *osize = outpos;
584 
585  /* The new signal will be offset by half firlen window so fix it */
586  memmove(*out,*out+g_firlen/4,*osize*2);
587  *osize -= g_firlen/4;
588 
589  return 0;
590 
591 }
592 
float end(const EST_Item &item)
Definition: EST_item_aux.cc:96
#define MAXLENGTH
Definition: rateconv.cc:180
#define INBUFFSIZE
Definition: rateconv.cc:184
#define sqr(a)
Definition: rateconv.cc:185
void fir_mono(float *inp, float *coep, int firlen, float *outp)
Definition: rateconv.cc:278
#define M_PI
Definition: rateconv.cc:188
NULL
Definition: EST_WFST.cc:55
getString int
Definition: EST_item_aux.cc:50
int rateconv(short *in, int isize, short **out, int *osize, int in_samp_freq, int out_samp_freq)
Definition: rateconv.cc:543
#define OUTBUFFSIZE
Definition: rateconv.cc:183
#define WORD
Definition: rateconv.cc:172