Bug Summary

File:modules/UniSyn/us_unit.cc
Location:line 104, column 5
Description:Value stored to 'print_centre' is never read

Annotated Source Code

1/*************************************************************************/
2/* */
3/* Centre for Speech Technology Research */
4/* University of Edinburgh, UK */
5/* Copyright (c) 1996,1997 */
6/* All Rights Reserved. */
7/* */
8/* Permission is hereby granted, free of charge, to use and distribute */
9/* this software and its documentation without restriction, including */
10/* without limitation the rights to use, copy, modify, merge, publish, */
11/* distribute, sublicense, and/or sell copies of this work, and to */
12/* permit persons to whom this work is furnished to do so, subject to */
13/* the following conditions: */
14/* 1. The code must retain the above copyright notice, this list of */
15/* conditions and the following disclaimer. */
16/* 2. Any modifications must be clearly marked as such. */
17/* 3. Original authors' names are not deleted. */
18/* 4. The authors' names are not used to endorse or promote products */
19/* derived from this software without specific prior written */
20/* permission. */
21/* */
22/* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23/* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24/* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25/* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26/* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27/* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28/* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29/* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30/* THIS SOFTWARE. */
31/* */
32/*************************************************************************/
33/* */
34/* Author: Paul Taylor */
35/* Date: 6 Jan 1998 */
36/* --------------------------------------------------------------------- */
37/* Acoustic Unit Concatenation */
38/* */
39/*************************************************************************/
40
41
42#include "siod.h"
43#include "EST_sigpr.h"
44#include "EST_wave_aux.h"
45#include "EST_track_aux.h"
46#include "EST_ling_class.h"
47#include "us_synthesis.h"
48#include <cmath>
49#include "Phone.h"
50
51using namespace std;
52
53void merge_features(EST_Item *from, EST_Item *to, int keep_id);
54
55void dp_time_align(EST_Utterance &utt, const EST_String &source_name,
56 const EST_String &target_name,
57 const EST_String &time_name,
58 bool do_start);
59
60void concatenate_unit_coefs(EST_Relation &unit_stream, EST_Track &source_lpc);
61void us_unit_raw_concat(EST_Utterance &utt);
62
63void window_units(EST_Relation &unit_stream,
64 EST_TVector<EST_Wave> &frames,
65 float window_factor,
66 EST_String window_name,
67 bool window_symmetric,
68 EST_IVector *pm_indices=0);
69
70bool dp_match(const EST_Relation &lexical,
71 const EST_Relation &surface,
72 EST_Relation &match,
73 float ins, float del, float sub);
74
75void map_match_times(EST_Relation &target, const EST_String &match_name,
76 const EST_String &time_name, bool do_start);
77
78
79static void window_frame(EST_Wave &frame, EST_Wave &whole, float scale,
80 int start, int end, EST_WindowFunc *window_function,
81 int centre_index=-1)
82{
83 int i, j, send;
84 EST_TBuffer<float> window;
85 int window_length = (end-start)+1;
86
87 if (frame.num_samples() != (window_length))
88 frame.resize(window_length);
89 frame.set_sample_rate(whole.sample_rate());
90 // Ensure we have a safe end
91 if (end < whole.num_samples())
92 send = end;
93 else
94 send = whole.num_samples();
95
96
97 int print_centre;
98 if ( centre_index < 0 ){
99 window_function( window_length, window, -1 );
100 print_centre = (window_length-1)/2+start;
101 }
102 else{
103 window_function( window_length, window, (centre_index-start));
104 print_centre = centre_index;
Value stored to 'print_centre' is never read
105 }
106
107
108#if defined(EST_DEBUGGING)
109 cerr << "(start centre end window_length wholewavelen) "
110 << start << " "
111 << print_centre << " "
112 << end << " "
113 << window_length << " "
114 << whole.num_samples() << endl;
115#endif
116
117
118 // To allow a_no_check access we do this in three stages
119 for (i = 0, j = start; j < 0; ++i, ++j)
120 frame.a_no_check(i) = 0;
121 for ( ; j < send; ++i, ++j)
122 frame.a_no_check(i) = (int)((float)whole.a_no_check(j) * window(i) * scale);
123 for ( ; j < end; ++j,++i)
124 frame.a_no_check(i) = 0;
125
126
127#if defined(EST_DEBUGGING)
128 // It's not always very nice to resynthesise speech from
129 // inserted zeros! These checks should alert the user (me ;)
130 if( start<0 )
131 EST_warning(EST_error_where = __null), (*EST_warning_func)( "padded start of pitch period with zeros (index %d)", i );
132
133 if( end>whole.num_samples() )
134 EST_warning(EST_error_where = __null), (*EST_warning_func)( "padded end of pitch period with zeros (frame %d)", i );
135#endif
136}
137
138
139// The window_signal function has been changed in several ways:
140//
141// *) The function now has an asymmetric window mode.
142//
143// In this mode, asymmetric windows are used from pitchmark at t-1
144// to pitchmark at time t+1, with the maximum value of 1.0 at
145// pitchmark at time t.
146//
147// *) In the original symmetric mode:
148//
149// The first change is to ensure the window frames always have an
150// odd number of samples (a convention for how to handle rounding
151// problems when converting from times (float) to sample numbers
152// (int)). The centre sample corresponds to the pitch mark time.
153//
154// The second change is that the estimate of local pitch period is
155// always based in current and *previous* pitchmark. In the case
156// of the first pitch mark in track pm, the previous pitchmark is
157// assumed to be at zero time. Hopefully, this won't break much.
158// However, if this convention is not used everywhere else that
159// it's needed and some things break, then arguably those
160// things need to be fixed to adhere to this same convention...
161void window_signal(EST_Wave &sig, EST_Track &pm,
162 EST_WaveVector &frames, int &i, float scale,
163 float window_factor,
164 EST_WindowFunc *window_function,
165 bool window_symmetric,
166 EST_IVector *pm_indices=0)
167{
168 float first_pos, period=0.0;
169 float prev_pm, current_pm;
170 int first_sample, centre_sample, last_sample;
171 int sample_rate = sig.sample_rate();
172 int pm_num_frames = pm.num_frames();
173
174 // estimate first period as pitchmark time itself (i.e. assume a previous
175 // pitchmark at 0.0 time, waveform sample 0)
176 prev_pm = 0.0;
177
178
179 if( window_symmetric )
180 {
181 if (pm_num_frames < 1 )
182 EST_error(EST_error_where = __null), (*EST_error_func)( "Attempted to Window around less than 1 pitchmark" );
183
184 for( int j=0; j<pm_num_frames; ++j, ++i ){
185 current_pm = pm.t(j);
186 period = current_pm - prev_pm;
187 centre_sample = (int)rint( current_pm*(float)sample_rate );
188
189 first_pos = prev_pm - (period * (window_factor-1.0));
190 first_sample = (int)rint( first_pos*(float)sample_rate );
191
192 last_sample = (2*centre_sample)-first_sample;
193
194 window_frame(frames[i], sig, scale, first_sample, last_sample, window_function);
195
196 prev_pm = current_pm;
197 }
198 }
199 else{
200 if( pm_indices == 0 )
201 EST_error(EST_error_where = __null), (*EST_error_func)( "required pitchmark indices EST_IVector is null" );
202
203 int j;
204
205 // Rob's experiment to see if we can handle small bits of speech with no pitchmarks.
206 // We just 0 the frames in this case.
207
208 if (pm_num_frames < 1 )
209 {
210 EST_warning(EST_error_where = __null), (*EST_warning_func)( "Attempted to Window around less than 1 pitchmark" );
211 }
212 else
213 {
214 for( j=0; j<pm_num_frames-1; ++j, ++i ){
215 current_pm = pm.t(j);
216 period = current_pm - prev_pm;
217 centre_sample = (int)rint( current_pm*(float)sample_rate );
218
219 first_pos = prev_pm - (period * (window_factor-1.0));
220 first_sample = (int)rint( first_pos*(float)sample_rate );
221
222 float next_pm = pm.t(j+1);
223 float last_pos = next_pm + ((next_pm-current_pm)*(window_factor-1.0));
224 last_sample = (int)rint( last_pos*(float)sample_rate );
225
226 window_frame(frames[i], sig, scale, first_sample,
227 last_sample, window_function, centre_sample);
228 (*pm_indices)[i] = centre_sample - first_sample;
229
230 prev_pm = current_pm;
231 }
232
233 //last frame window size is set according to pm.t(end) and the number
234 //of samples in the waveform (it is presumed the waveform begins at the
235 //preceeding pitchmark and ends at the pitchmark following the current
236 //unit...)
237
238 current_pm = pm.t(j);
239 centre_sample = (int)rint( current_pm*(float)sample_rate );
240 first_pos = prev_pm - (period * (window_factor-1.0));
241 first_sample = (int)rint( first_pos*(float)sample_rate );
242 last_sample = sig.num_samples()-1;
243 window_frame(frames[i], sig, scale, first_sample,
244 last_sample, window_function);
245 (*pm_indices)[i] = centre_sample - first_sample;
246
247#if defined(EST_DEBUGGING)
248 cerr << "changed: " << i << " " << pm_indices->n() << endl;
249#endif
250
251 ++i;
252 }
253 }
254}
255
256void window_units( EST_Relation &unit_stream,
257 EST_TVector<EST_Wave> &frames,
258 float window_factor,
259 EST_String window_name,
260 bool window_symmetric,
261 EST_IVector *pm_indices )
262{
263 int i;
264 EST_Wave *sig;
265 EST_Item *u;
266 EST_Track *coefs;
267 int num = 0;
268 float scale;
269 EST_WindowFunc *window_function;
270
271 for (u = unit_stream.head(); u; u = u->next())
272 num += track(u->f("coefs"))->num_frames();
273 frames.resize(num);
274
275 if( pm_indices != 0 )
276 pm_indices->resize(num);
277
278 if (window_name == "")
279 window_name = "hanning";
280
281 window_function = EST_Window::creator(window_name);
282
283 for (i = 0, u = unit_stream.head(); u; u = u->next())
284 {
285 sig = wave(u->f("sig"));
286 coefs = track(u->f("coefs"));
287 scale = (u->f_present("scale") ? u->F("scale") : 1.0);
288
289 window_signal(*sig, *coefs, frames, i, scale, window_factor,
290 window_function, window_symmetric, pm_indices);
291 }
292}
293
294
295void us_unit_concat(EST_Utterance &utt, float window_factor,
296 const EST_String &window_name,
297 bool no_waveform=false,
298 bool window_symmetric=true)
299
300{
301 EST_Relation *unit_stream;
302 EST_Track *source_coef = new EST_Track;
303 EST_WaveVector *frames = new EST_WaveVector;
304 EST_IVector *pm_indices = 0;
305
306 unit_stream = utt.relation("Unit", 1);
307
308 concatenate_unit_coefs(*unit_stream, *source_coef);
309
310 utt.create_relation("SourceCoef");
311 EST_Item *item = utt.relation("SourceCoef")->append();
312 item->set("name", "coef");
313 item->set_val("coefs", est_val(source_coef));
314
315 if (!no_waveform){
316 if( !window_symmetric )
317 pm_indices = new EST_IVector;
318
319 window_units(*unit_stream, *frames,
320 window_factor, window_name, window_symmetric, pm_indices);
321
322 item->set_val("frame", est_val(frames));
323
324 if( !window_symmetric )
325 item->set_val("pm_indices", est_val(pm_indices));
326 }
327}
328
329
330void us_get_copy_wave(EST_Utterance &utt, EST_Wave &source_sig,
331 EST_Track &source_coefs, EST_Relation &source_seg)
332{
333 EST_Item *s, *n;
334
335 if (!utt.relation_present("Segment"))
336 EST_error(EST_error_where = __null), (*EST_error_func)("utterance must have \"Segment\" relation\n");
337
338 utt.create_relation("TmpSegment");
339
340 for (s = source_seg.head(); s; s = s->next())
341 {
342 n = utt.relation("TmpSegment")->append();
343 merge_features(n, s, 0);
344 }
345
346 utt.relation("Segment")->remove_item_feature("source_end");
347
348 dp_time_align(utt, "TmpSegment", "Segment", "source_", 0);
349
350 utt.create_relation("Unit");
351 EST_Item *d = utt.relation("Unit")->append();
352
353
354 EST_Wave *ss = new EST_Wave;
355 *ss = source_sig;
356
357 EST_Track *c = new EST_Track;
358 *c = source_coefs;
359
360 d->set_val("sig", est_val(ss));
361 d->set_val("coefs", est_val(c));
362
363 utt.remove_relation("TmpSegment");
364}
365
366
367void us_energy_normalise(EST_Relation &unit)
368{
369 EST_Wave *sig;
370
371 for (EST_Item *s = unit.head(); s; s = s->next())
372 {
373 sig = wave(s->f("sig"));
374 if (s->f_present("energy_factor"))
375 sig->rescale(s->F("energy_factor"));
376 }
377}
378
379void us_unit_raw_concat(EST_Utterance &utt)
380{
381 EST_Wave *sig, *unit_sig;
382 EST_Track *unit_coefs=0;
383 float window_factor;
384 int i, j, k;
385 int first_pm, last_pm, last_length;
386 float first_pos, last_pos;
387
388 window_factor = get_c_float(siod_get_lval("window_factor",
389 "UniSyn: no window_factor"));
390 sig = new EST_Wave;
391
392 sig->resize(1000000);
393 sig->fill(0);
394 j = 0;
395
396 for (EST_Item *s = utt.relation("Unit", 1)->head(); s; s = s->next())
397 {
398 unit_sig = wave(s->f("sig"));
399 unit_coefs = track(s->f("coefs"));
400
401 first_pos = unit_coefs->t(1);
402 first_pm = (int)(first_pos * (float)unit_sig->sample_rate());
403
404 last_pos = unit_coefs->t(unit_coefs->num_frames()-2);
405 last_pm = (int)(last_pos * (float)unit_sig->sample_rate());
406 last_length = unit_sig->num_samples() - last_pm;
407
408// std::cout << "first pm: " << first_pm << endl;
409// std::cout << "last pm: " << last_pm << endl;
410// std::cout << "last length: " << last_length << endl;
411
412 j -= first_pm;
413
414 for (i = 0; i < first_pm; ++i, ++j)
415 sig->a_safe(j) += (short)((((float) i)/ (float)first_pm) *(float)unit_sig->a_safe(i)+0.5);
416
417 for (; i < last_pm; ++i, ++j)
418 sig->a(j) = unit_sig->a(i);
419
420 for (k = 0; i < unit_sig->num_samples(); ++i, ++j, ++k)
421 sig->a_safe(j) += (short)((1.0 - (((float) k) / (float) last_length))
422 * (float)unit_sig->a_safe(i) + 0.5);
423
424// j -= last_length;
425// j += 2000;
426 }
427
428 sig->resize(j);
429 sig->set_sample_rate(16000);
430
431 add_wave_to_utterance(utt, *sig, "Wave");
432}
433
434
435void concatenate_unit_coefs(EST_Relation &unit_stream, EST_Track &source_lpc)
436{
437 int num_source_frames = 0;
438 int num_source_channels = 0;;
439 float prev_time, abs_offset, rel_offset, period, offset;
440 int i, j, k, l;
441 EST_Track *coefs;
442
443 EST_Item *u = unit_stream.head();
444 if( u == 0 ){
445 //sometimes we are just asked to synthesise empty utterances, and
446 //code elsewhere wants us to continue...
447 source_lpc.resize(0,0);
448 }
449 else{
450 EST_Track *t = 0;
451 for ( ; u; u = u->next())
452 {
453 t = track(u->f("coefs"));
454 num_source_frames += t->num_frames();
455 }
456
457 num_source_channels = t->num_channels();
458
459 source_lpc.resize(num_source_frames, num_source_channels);
460 source_lpc.copy_setup(*t);
461
462 prev_time = 0.0;
463 // copy basic information
464 for (i = 0, l = 0, u = unit_stream.head(); u; u = u->next())
465 {
466 coefs = track(u->f("coefs"));
467
468 for (j = 0; j < coefs->num_frames(); ++j, ++i)
469 {
470 for (k = 0; k < coefs->num_channels(); ++k)
471 source_lpc.a_no_check(i, k) = coefs->a_no_check(j, k);
472 source_lpc.t(i) = coefs->t(j) + prev_time;
473 }
474
475 prev_time = source_lpc.t(i - 1);
476 u->set("end", prev_time);
477 u->set("num_frames", coefs->num_frames());
478 }
479 }
480
481 // adjust pitchmarks
482 abs_offset = 0.0;
483 rel_offset = 0.0;
484 // absolute offset in seconds
485 abs_offset = get_c_float(siod_get_lval("us_abs_offset", "zz"));
486 // relative offset as a function of local pitch period
487 rel_offset = get_c_float(siod_get_lval("us_rel_offset", "zz"));
488
489 if( abs_offset!=0.0 || rel_offset!=0.0 ){
490 std::cerr << "Adjusting pitchmarks" << std::endl;
491 for (i = 0; i < source_lpc.num_frames(); ++i){
492 period = get_time_frame_size(source_lpc, (i));
493 offset = abs_offset + (rel_offset * period);
494 source_lpc.t(i) = source_lpc.t(i) + offset;
495 }
496 }
497}
498
499// jointimes specifies centre of last pitch period in each
500// concatenated unit
501// void us_linear_smooth_amplitude( EST_Wave *w,
502// const EST_Track &pm,
503// const EST_FVector &jointimes)
504// {
505// int num_joins = jointimes.length();
506
507// EST_Track *factor_contour = new EST_Track( num_joins );
508
509// for( int i=0; i<num_joins; ++i ){
510// float join_t = jointimes(i);
511// int join_indx = pm.index_below( join_t );
512
513// // estimate local short-time energy function either side of join
514// int left_start = rount(pm.t(join_indx-2)*(float)16000);
515// int left_end = rount(pm.t(join_indx)*(float)16000);
516// float left_power = 0.0 ;
517// for( int j=left_start; j<left_end; ++j )
518// left_power += pow( w[j], 2 );
519
520// left_power /= (left_end - left_start); //normalise for frame length
521
522// int right_start = rount(pm.t(join_indx+1)*(float)16000);
523// int right_end = rount(pm.t(join_indx+3)*(float)16000);
524// float right_power = 0.0;
525// for( int j=right_start; j<right_end; ++j )
526// right_power += pow( w[j], 2 );
527
528// right_power /= (right_end - right_start); //normalise for frame length
529
530// float mean_power = (left_power+right_power)/2.0;
531
532// float left_factor = left_power/mean_power;
533// float right_factor = right_power/mean_power;
534
535// (*factor_contour)[i] = left_factor;
536// (*factor_contour)[i+1] = right_factor;
537// }
538
539// }
540
541static EST_Track* us_pitch_period_energy_contour( const EST_WaveVector &pp,
542 const EST_Track &pm )
543{
544 const int pp_length = pp.length();
545
546 EST_Track *contour = new EST_Track;
547 contour->resize( pp_length, 1 );
548
549 for( int i=0; i<pp_length; ++i ){
550 const EST_Wave &frame = pp(i);
551 const int frame_length = frame.length();
552
553 // RMSE for EST_Wave window
554 int j;
555 for( contour->a_no_check(i,0) = 0.0, j=0; j<frame_length; ++j )
556 contour->a_no_check( i, 0 ) += pow( float(frame.a_no_check( j )), float(2.0) );
557
558 contour->a_no_check(i,0) = sqrt( contour->a_no_check(i,0) / (float)j );
559 contour->t(i) = pm.t(i);
560 }
561
562 return contour;
563}
564
565EST_Val ffeature(EST_Item *item,const EST_String &fname);
566
567void us_linear_smooth_amplitude( EST_Utterance *utt )
568{
569 EST_WaveVector *pp = wavevector(utt->relation("SourceCoef")->first()->f("frame"));
570 EST_Track *pm = track(utt->relation("SourceCoef")->first()->f("coefs"));
571
572 EST_Track *energy = us_pitch_period_energy_contour( *pp, *pm );
573 energy->save( "./energy_track.est", "est" );
574
575 FILE *ofile = fopen( "./join_times.est", "w" );
576 EST_Relation *units = utt->relation("Unit");
577 for( EST_Item *u=units->head(); u; u=u->next() ){
578
579 EST_Item *diphone_left = u;
580 // EST_Item *diphone_right = u->next();
581
582 fprintf( ofile, "%s\t%f\n", diphone_left->S("name").str(), diphone_left->F("end"));
583
584 EST_Item *join_phone_left = item(diphone_left->f("ph1"))->next();
585 EST_String phone_name = join_phone_left->S("name");
586 if( ph_is_sonorant( phone_name ) && !ph_is_silence( phone_name )){
587
588 //if( (ffeature(join_phone_left, "ph_vc")).S() == "+"){ // ideally for sonorants
589
590 std::cerr << "smoothing phone " << join_phone_left->S("name") << std::endl;
591
592 // EST_Item *join_phone_right = item(diphone_right->f("ph1"));
593
594 int left_end_index = energy->index(diphone_left->F("end"));
595 int right_start_index = left_end_index + 1;
596 float left_power = energy->a(left_end_index,0);
597 float right_power = energy->a(right_start_index,0);
598
599 float mean_power = (left_power+right_power)/2.0;
600 float left_factor = left_power/mean_power;
601 float right_factor = right_power/mean_power;
602
603 int smooth_start_index = left_end_index-5;
604 int smooth_end_index = right_start_index+5;
605
606
607 // rescale left pitch periods
608 float factor = 1.0;
609 float factor_incr = (left_factor-1.0)/(float)(left_end_index - smooth_start_index);
610 for( int i=smooth_start_index; i<=left_end_index; ++i, factor+=factor_incr ){
611 (*pp)[i].rescale( factor, 0 );
612 std::cerr << "rescaled frame " << i << "(factor " << factor << ")" << std::endl;
613 }
614
615 // rescale right pitch periods
616 factor = right_factor;
617 factor_incr = (1.0-right_factor)/(float)(smooth_end_index-right_start_index);
618 for( int i=right_start_index; i<=smooth_end_index; ++i, factor+=factor_incr){
619 (*pp)[i].rescale( factor, 0 );
620 std::cerr << "rescaled frame " << i << "(factor " << factor << ")" << std::endl;
621 }
622 }
623 else
624 std::cerr << "no smoothing for " << join_phone_left->S("name") << std::endl;
625
626 std::cerr << std::endl;
627 }
628
629 fclose( ofile );
630 delete energy;
631}
632