File: | modules/UniSyn/us_prosody.cc |
Location: | line 191, column 5 |
Description: | Value stored to 'm' is never read |
1 | /*************************************************************************/ |
2 | /* */ |
3 | /* Centre for Speech Technology Research */ |
4 | /* University of Edinburgh, UK */ |
5 | /* Copyright (c) 1996,1997 */ |
6 | /* All Rights Reserved. */ |
7 | /* */ |
8 | /* Permission is hereby granted, free of charge, to use and distribute */ |
9 | /* this software and its documentation without restriction, including */ |
10 | /* without limitation the rights to use, copy, modify, merge, publish, */ |
11 | /* distribute, sublicense, and/or sell copies of this work, and to */ |
12 | /* permit persons to whom this work is furnished to do so, subject to */ |
13 | /* the following conditions: */ |
14 | /* 1. The code must retain the above copyright notice, this list of */ |
15 | /* conditions and the following disclaimer. */ |
16 | /* 2. Any modifications must be clearly marked as such. */ |
17 | /* 3. Original authors' names are not deleted. */ |
18 | /* 4. The authors' names are not used to endorse or promote products */ |
19 | /* derived from this software without specific prior written */ |
20 | /* permission. */ |
21 | /* */ |
22 | /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */ |
23 | /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */ |
24 | /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */ |
25 | /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */ |
26 | /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */ |
27 | /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */ |
28 | /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */ |
29 | /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */ |
30 | /* THIS SOFTWARE. */ |
31 | /* */ |
32 | /*************************************************************************/ |
33 | /* */ |
34 | /* Author: Paul Taylor */ |
35 | /* Date: 6 Jan 1998 */ |
36 | /* --------------------------------------------------------------------- */ |
37 | /* UniSyn prosody manipulation functions */ |
38 | /* */ |
39 | /*************************************************************************/ |
40 | |
41 | #include "us_synthesis.h" |
42 | #include "Phone.h" |
43 | |
44 | //static void add_end_silences(EST_Relation &segment); |
45 | //static void add_end_silences(EST_Relation &segment, EST_Relation &target); |
46 | |
47 | void pitchmarks_to_f0(EST_Track &pm, EST_Track &fz, float shift) |
48 | { |
49 | int i; |
50 | float period; |
51 | |
52 | fz.resize((int)(pm.end()/shift), 1); |
53 | fz.fill_time(shift); |
54 | |
55 | for (i = 0; i < fz.num_frames() -1 ; ++i) |
56 | { |
57 | period = get_time_frame_size(pm, pm.index_below(fz.t(i))); |
58 | fz.a(i) = 1.0 /period; |
59 | } |
60 | } |
61 | |
62 | void f0_to_pitchmarks(EST_Track &fz, EST_Track &pm, int num_channels, |
63 | float default_f0, float target_end) |
64 | { |
65 | int i; |
66 | float max = 0.0; |
67 | float fz_end; |
68 | |
69 | // Its impossible to guess the length of the pitchmark array before |
70 | // hand. Here we find the upper limit and resize at the end |
71 | for (i = 0; i < fz.num_frames(); ++i) |
72 | { |
73 | if (fz.a_no_check(i) < 0) |
74 | fz.a_no_check(i) = 0; |
75 | if (fz.a_no_check(i) > 500) |
76 | fz.a_no_check(i) = fz.a_no_check(i-1); |
77 | if (fz.a_no_check(i) > max) |
78 | max = fz.a_no_check(i); |
79 | } |
80 | |
81 | // Coefficients will also be placed in here, so its best allocate |
82 | // space for their channels now |
83 | fz_end = fz.end(); |
84 | pm.resize(int(max * (Gof(fz_end, target_end)(((fz_end) > (target_end)) ? (fz_end) : (target_end)))) + 10, num_channels); |
85 | |
86 | |
87 | int fz_len = fz.length(); |
88 | float t1 = 0.0; //first pitchmark convention |
89 | float t2; |
90 | |
91 | float f1 = fz.a_no_check(0); //arbitrary init |
92 | float f2; |
93 | |
94 | double area = 0.5; // init value |
95 | int pm_i = 0; |
96 | int pm_len = pm.length(); |
97 | for( int i=0; i<fz_len; i++ ){ |
98 | t2 = fz.t( i ); |
99 | f2 = fz.a_no_check( i ); |
100 | |
101 | float slope = (f2-f1)/(t2-t1); |
102 | area += (t2 - t1) * 0.5 * (f1 + f2); |
103 | while( (area >= 1.0) && (pm_i < pm_len) ){ |
104 | area -= 1.0; |
105 | float discriminant = f2*f2 - 2.0 * area * slope; |
106 | if (discriminant < 0.0) discriminant = 0.0; |
107 | pm.t(pm_i++) = t2 - 2.0 * area / (f2 + sqrt (discriminant)); |
108 | } |
109 | t1 = t2; |
110 | f1 = f2; |
111 | } |
112 | |
113 | float default_shift = 1.0 / default_f0; |
114 | if (target_end > fz_end) |
115 | for (; t1 < target_end; ++pm_i) |
116 | t1 = pm.t(pm_i) = t1 + default_shift; |
117 | |
118 | pm.resize(pm_i-1, num_channels); |
119 | } |
120 | |
121 | |
122 | |
123 | /* Convert an F0 contour into a set of pitchmarks. This is done by the |
124 | obvious iterative function. |
125 | |
126 | Space before the first defined F0 value is filled with regularly space |
127 | pitchmarks at intervals 1/def_f0. If the target_end value is |
128 | specified, more default pitchmarks are placed after the end of the |
129 | last f0 value until time target_end has been reached. |
130 | */ |
131 | |
132 | void f0_to_pitchmarks_orig(EST_Track &fz, EST_Track &pm, int num_channels, |
133 | float default_f0, float target_end) |
134 | { |
135 | int i; |
136 | float max = 0.0, prev_pm = 0.0, val; |
137 | float fz_end; |
138 | |
139 | // cout << "fz end: " << fz.end() << endl; |
140 | // cout << "fz n fg: " << fz.num_frames() << endl; |
141 | |
142 | // Its impossible to guess the length of the pitchmark array before |
143 | // hand. Here we find the upper limit and resize at the end |
144 | for (i = 0; i < fz.num_frames(); ++i) |
145 | { |
146 | if (fz.a_no_check(i) < 0) |
147 | fz.a_no_check(i) = 0; |
148 | if (fz.a_no_check(i) > 500) |
149 | fz.a_no_check(i) = fz.a_no_check(i-1); |
150 | if (fz.a_no_check(i) > max) |
151 | max = fz.a_no_check(i); |
152 | } |
153 | |
154 | // Coefficients will also be placed in here, so its best allocate |
155 | // space for their channels now |
156 | fz_end = fz.end(); |
157 | pm.resize(int(max * (Gof(fz_end, target_end)(((fz_end) > (target_end)) ? (fz_end) : (target_end)))) + 10, num_channels); |
158 | |
159 | // cout << "fz end: " << fz.end() << endl; |
160 | // cout << "fz n fg: " << fz.num_frames() << endl; |
161 | // cout << "pmn fg: " << pm.num_frames() << endl; |
162 | |
163 | for (i = 0; prev_pm < fz_end; ++i) |
164 | { |
165 | val = fz.a(prev_pm) > 0.0 ? fz.a(prev_pm) : default_f0; |
166 | pm.t(i) = prev_pm + (1.0 / val); |
167 | prev_pm = pm.t(i); |
168 | } |
169 | |
170 | if (target_end > fz_end) |
171 | for (; prev_pm < target_end; ++i) |
172 | { |
173 | pm.t(i) = prev_pm + (1.0 / default_f0); |
174 | prev_pm = pm.t(i); |
175 | } |
176 | |
177 | pm.resize(i - 1, num_channels); |
178 | } |
179 | |
180 | // not sure if this is useful |
181 | void linear_pitchmarks(EST_Track &source_pm, EST_Track &target_pm, |
182 | float start_f0, float end_f0) |
183 | { |
184 | int i; |
185 | float m, length, pitch; |
186 | target_pm.resize(source_pm.num_frames(), source_pm.num_channels()); |
187 | |
188 | length = (float)source_pm.num_frames() / (end_f0 - start_f0); |
189 | |
190 | target_pm.t(0) = 0.0; |
191 | m = (end_f0 - start_f0) / length; |
Value stored to 'm' is never read | |
192 | |
193 | for(i = 1; i < target_pm.num_frames(); ++i) |
194 | { |
195 | pitch = (((float)i / (float) target_pm.num_frames()) |
196 | * (end_f0 - start_f0)) + start_f0; |
197 | target_pm.t(i) = target_pm.t(i - 1) + (1 /pitch); |
198 | } |
199 | } |
200 | |
201 | // not sure if this is useful |
202 | void stretch_f0_time(EST_Track &f0, float stretch, |
203 | float s_last_time, float t_last_time) |
204 | { |
205 | for (int i = 0 ; i < f0.num_frames(); ++i) |
206 | { |
207 | // cout << i << " o t:" << f0.t(i) << endl; |
208 | f0.t(i) = ((f0.t(i) - s_last_time) * stretch) + t_last_time; |
209 | // cout << i << " m t:" << f0.t(i) << endl; |
210 | } |
211 | } |
212 | |
213 | // make target F0 from source F0, with same F0 values as original, |
214 | // but durations specified by target_seg. |
215 | |
216 | /* |
217 | void us_F0targets_to_pitchmarks(EST_Utterance &utt, |
218 | const EST_String &seg_relation) |
219 | { |
220 | utt.create_relation("TargetCoef"); |
221 | EST_Track *target_coef = new EST_Track; |
222 | EST_Item *end_seg; |
223 | int num_channels = 0; |
224 | float end; |
225 | |
226 | if (utt.relation_present("SourceCoef")) |
227 | { |
228 | EST_Track *source_coef = |
229 | track(utt.relation("SourceCoef")->head()->f("coefs")); |
230 | num_channels = source_coef->num_channels(); |
231 | } |
232 | |
233 | if (seg_relation == "") |
234 | end_seg = utt.relation("Segment", 1)->last(); |
235 | else |
236 | end_seg = utt.relation(seg_relation, 1)->last(); |
237 | |
238 | if (end_seg) |
239 | end = end_seg->F("end"); |
240 | else |
241 | end = 0; |
242 | |
243 | targets_to_pitchmarks(*(utt.relation("Target")), *target_coef, |
244 | num_channels,end); |
245 | |
246 | EST_Item *item = utt.relation("TargetCoef")->append(); |
247 | item->set("name", "coef"); |
248 | item->set_val("coefs",est_val(target_coef)); |
249 | |
250 | } |
251 | |
252 | void targets_to_pitchmarks(EST_Relation &targ, EST_Track &pitchmarks, |
253 | int num_channels,float end) |
254 | { |
255 | EST_Item *s; |
256 | float time, f0, prev_time, prev_f0, m, max; |
257 | int i; |
258 | |
259 | // Its impossible to guess the length of the pitchmark array before |
260 | // hand. Here we find the upper limit and resize at the end |
261 | for (max = 0.0, s = targ.first_leaf(); s; s = next_leaf(s)) |
262 | if (s->F("f0") > max) |
263 | max = s->F("f0"); |
264 | |
265 | pitchmarks.resize((int)(max * 1.1 * end)+1, num_channels); |
266 | |
267 | prev_time = 0; |
268 | prev_f0 = targ.first_leaf() ? targ.first_leaf()->F("f0") : 120; |
269 | pitchmarks.t(0) = 0.0; |
270 | |
271 | for (i = 1, s = targ.first_leaf(); s; s = next_leaf(s)) |
272 | { |
273 | time = s->f("pos"); |
274 | f0 = s->F("f0"); |
275 | |
276 | if (f0 < 30) // to protect against with duff IntTarget algorithms |
277 | continue; |
278 | if (time == prev_time) |
279 | continue; |
280 | else if (time < prev_time) |
281 | { |
282 | cerr << "UniSyn: warning target in wrong order at " << prev_time; |
283 | cerr << " ignored" << endl; |
284 | continue; |
285 | } |
286 | m = (f0 - prev_f0) / (time - prev_time); |
287 | |
288 | |
289 | { |
290 | f0 = (m * (pitchmarks.t(i - 1) - prev_time)) + prev_f0; |
291 | pitchmarks.t(i) = pitchmarks.t(i - 1) + 1.0/f0; |
292 | } |
293 | prev_time = time; |
294 | prev_f0 = f0; |
295 | } |
296 | // Ensure pitch marks go to the end of the utterance |
297 | // This will effectively mean the last half diphone will be extend over |
298 | // the whol final segment. This will only be reasonable if the |
299 | // final segment is a silence. |
300 | for (; pitchmarks.t(i - 1) < end; ++i) |
301 | pitchmarks.t(i) = pitchmarks.t(i - 1) + 1.0/prev_f0; |
302 | pitchmarks.resize(i, pitchmarks.num_channels()); |
303 | } |
304 | */ |
305 | |
306 | |
307 | /*static void add_end_silences(EST_Relation &segment, EST_Relation &target) |
308 | { |
309 | EST_Item *t, *n; |
310 | float shift = 0.0; |
311 | const float pause_duration = 0.1; |
312 | |
313 | t = segment.head(); |
314 | if (!ph_is_silence(t->f("name"))) |
315 | { |
316 | n = t->insert_before(); |
317 | n->set("name", ph_silence()); |
318 | n->set("dur", pause_duration); |
319 | shift += pause_duration; |
320 | } |
321 | |
322 | t = segment.tail(); |
323 | if (!ph_is_silence(t->f("name"))) |
324 | { |
325 | n = t->insert_after(); |
326 | n->set("name", ph_silence()); |
327 | n->set("dur", pause_duration); |
328 | shift += pause_duration; |
329 | } |
330 | dur_to_end(segment); |
331 | |
332 | target.tail()->set("pos", (target.tail()->F("pos") + shift)); |
333 | } |
334 | |
335 | void merge_pitchmarks(EST_Utterance &u, EST_Track &pm1, |
336 | EST_Track &pm2, EST_Track &target_pm, |
337 | EST_Relation &guide) |
338 | { |
339 | EST_Item *s; |
340 | float s_end, s_start; |
341 | int s_i_start, s_i_end; |
342 | int i, j = 0; |
343 | (void) u; |
344 | |
345 | target_pm.resize(1000000, 0); |
346 | s_start = 0.0; |
347 | |
348 | for (s = guide.head(); s; s = s->next()) |
349 | { |
350 | s_end = s->F("end", 1); |
351 | if (s->fI("use_pm") == 1) |
352 | { |
353 | s_i_start = pm1.index_below(s_start); |
354 | s_i_end = pm1.index_below(s_end); |
355 | for (i = s_i_start; i < s_i_end; ++i, ++j) |
356 | target_pm.t(j) = pm1.t(i); |
357 | } |
358 | else |
359 | { |
360 | s_i_start = pm2.index_below(s_start); |
361 | s_i_end = pm2.index_below(s_end); |
362 | for (i = s_i_start; i < s_i_end; ++i, ++j) |
363 | target_pm.t(j) = pm2.t(i); |
364 | } |
365 | s_start = s_end; |
366 | } |
367 | } |
368 | |
369 | void warp_f0(EST_Track &source_f0, EST_Relation &source_seg, |
370 | EST_Track &target_f0, EST_Relation &target_seg) |
371 | { |
372 | EST_Item *s, *t; |
373 | float prev_source_end = 0.0, prev_target_end = 0.0; |
374 | EST_Track part; |
375 | int frame_start, frame_end; |
376 | float stretch, t_last_time = 0, s_last_time = 0; |
377 | EST_Relation match("Match"); |
378 | EST_Item xx; |
379 | EST_Track str; |
380 | int i = 0; |
381 | |
382 | dp_match(target_seg, source_seg, match, local_cost, &xx); |
383 | |
384 | target_f0 = source_f0; |
385 | frame_start = 0; |
386 | frame_end = 0; |
387 | |
388 | str.resize(target_seg.length(), 1); |
389 | |
390 | cout << "tag: " << target_seg << endl; |
391 | |
392 | for (t = target_seg.head(); t; t = t->next()) |
393 | { |
394 | s = daughter1(t,"Match"); |
395 | if (s == 0) // ie extra phone in target specification |
396 | continue; |
397 | |
398 | frame_end = source_f0.index(s->f("end")); |
399 | if ((frame_end - frame_start) < 1) |
400 | { |
401 | cout << "Warning no frames for: " << *t << endl; |
402 | continue; |
403 | } |
404 | target_f0.sub_track(part, frame_start, (frame_end - frame_start + 1), |
405 | 0, EST_ALL); |
406 | |
407 | stretch = (t->F("end") - prev_target_end) / |
408 | (s->F("end") - prev_source_end); |
409 | |
410 | str.a(i) = stretch; |
411 | str.t(i++) = t->F("end"); |
412 | |
413 | cout << "\nstretch: " << stretch << endl; |
414 | cout << "source: " << *s << endl; |
415 | cout << "target: " << *t << endl; |
416 | cout << "frames: " << frame_start << " " << frame_end << endl; |
417 | |
418 | stretch_f0_time(part, stretch, s_last_time, t_last_time); |
419 | |
420 | prev_target_end = t->f("end"); |
421 | prev_source_end = s->f("end"); |
422 | frame_start = frame_end + 1; |
423 | t_last_time = part.end(); |
424 | s_last_time = source_f0.t(frame_end); |
425 | cout << "last time = " << s_last_time << " " << t_last_time << endl; |
426 | } |
427 | target_f0.resize(frame_end, 1); |
428 | target_f0.a(target_f0.num_frames() - 1) = 100; |
429 | str.save("zz_stretch"); |
430 | } |
431 | |
432 | void warp_pitchmarks(EST_Utterance &utt, EST_Track *source_pm, |
433 | EST_Relation &source_seg, EST_Relation &target_seg) |
434 | { |
435 | EST_Track source_f0, target_f0, *target_pm; |
436 | |
437 | target_pm = new EST_Track; |
438 | |
439 | cout << "tag: "<< target_seg << endl; |
440 | |
441 | add_end_silences(target_seg); |
442 | |
443 | |
444 | cout << "tag 2: "<< target_seg << endl; |
445 | |
446 | pitchmarks_to_f0(*source_pm, source_f0, 0.01); |
447 | |
448 | cout << "tag 3: "<< target_seg << endl; |
449 | |
450 | warp_f0(source_f0, source_seg, target_f0, target_seg); |
451 | |
452 | f0_to_pitchmarks(target_f0, *target_pm); |
453 | |
454 | utt.create_relation("TargetCoef"); |
455 | utt.create_relation("SourceSegments"); |
456 | |
457 | *utt.relation("SourceSegments") = source_seg; |
458 | |
459 | EST_Item *item = utt.relation("TargetCoef")->append(); |
460 | |
461 | target_f0.save("tt_tar.f0", "est"); |
462 | target_seg.save("tt_tar.lab"); |
463 | source_seg.save("tt_sou.lab"); |
464 | source_f0.save("tt_sou.f0", "est"); |
465 | |
466 | target_pm->save("target_coef_a.pm","est"); |
467 | item->set("name", "coefs"); |
468 | item->set_val("coefs", est_val(target_pm)); |
469 | } |
470 | |
471 | float local_cost(const EST_Item *s1, const EST_Item *s2) |
472 | { |
473 | <<<<<<< us_prosody.cc |
474 | utt.create_relation("TargetCoef"); |
475 | EST_Track *target_coef = new EST_Track; |
476 | EST_Item *end_seg; |
477 | int num_channels = 0; |
478 | float end; |
479 | |
480 | if (utt.relation_present("SourceCoef")) |
481 | { |
482 | EST_Track *source_coef = |
483 | track(utt.relation("SourceCoef")->head()->f("coefs")); |
484 | num_channels = source_coef->num_channels(); |
485 | } |
486 | ======= |
487 | float insertion_cost = get_c_int(siod_get_lval("met_insertion", NULL)); |
488 | float deletion_cost = get_c_int(siod_get_lval("met_deletion", NULL)); |
489 | float substitution_cost = |
490 | get_c_int(siod_get_lval("met_substitution", NULL)); |
491 | >>>>>>> 1.14 |
492 | |
493 | EST_String null_sym = "nil"; |
494 | |
495 | // otherwise cost is either insertion cost, or cost_matrix value |
496 | if (s1->name() == s2->name()) |
497 | return 0; |
498 | else |
499 | { |
500 | if (s1->name() == null_sym) |
501 | return insertion_cost; |
502 | else if (s2->name() == null_sym) |
503 | return deletion_cost; |
504 | else |
505 | return substitution_cost; |
506 | } |
507 | } |
508 | typedef |
509 | float (*local_cost_function)(const EST_Item *item1, |
510 | const EST_Item *item2); |
511 | |
512 | bool dp_match(const EST_Relation &lexical, |
513 | const EST_Relation &surface, |
514 | EST_Relation &match, |
515 | local_cost_function lcf, |
516 | EST_Item *null_syl); |
517 | |
518 | |
519 | |
520 | */ |
521 | |
522 | /*static void add_end_silences(EST_Relation &segment) |
523 | { |
524 | EST_Item *t, *n; |
525 | |
526 | t = segment.head(); |
527 | if (!ph_is_silence(t->f("name"))) |
528 | { |
529 | n = t->insert_before(); |
530 | n->set("name", ph_silence()); |
531 | } |
532 | |
533 | t = segment.tail(); |
534 | if (!ph_is_silence(t->f("name"))) |
535 | { |
536 | n = t->insert_after(); |
537 | n->set("name", ph_silence()); |
538 | } |
539 | } |
540 | |
541 | */ |