File: | modules/clunits/clunits.cc |
Location: | line 440, column 5 |
Description: | Value stored to 'bbb' is never read |
1 | /*************************************************************************/ |
2 | /* */ |
3 | /* Carnegie Mellon University and */ |
4 | /* Centre for Speech Technology Research */ |
5 | /* University of Edinburgh, UK */ |
6 | /* Copyright (c) 1998-2001 */ |
7 | /* All Rights Reserved. */ |
8 | /* */ |
9 | /* Permission is hereby granted, free of charge, to use and distribute */ |
10 | /* this software and its documentation without restriction, including */ |
11 | /* without limitation the rights to use, copy, modify, merge, publish, */ |
12 | /* distribute, sublicense, and/or sell copies of this work, and to */ |
13 | /* permit persons to whom this work is furnished to do so, subject to */ |
14 | /* the following conditions: */ |
15 | /* 1. The code must retain the above copyright notice, this list of */ |
16 | /* conditions and the following disclaimer. */ |
17 | /* 2. Any modifications must be clearly marked as such. */ |
18 | /* 3. Original authors' names are not deleted. */ |
19 | /* 4. The authors' names are not used to endorse or promote products */ |
20 | /* derived from this software without specific prior written */ |
21 | /* permission. */ |
22 | /* */ |
23 | /* THE UNIVERSITY OF EDINBURGH, CARNEGIE MELLON UNIVERSITY AND THE */ |
24 | /* CONTRIBUTORS TO THIS WORK DISCLAIM ALL WARRANTIES WITH REGARD TO */ |
25 | /* THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY */ |
26 | /* AND FITNESS, IN NO EVENT SHALL THE UNIVERSITY OF EDINBURGH, CARNEGIE */ |
27 | /* MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE FOR ANY SPECIAL, */ |
28 | /* INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER */ |
29 | /* RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION */ |
30 | /* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF */ |
31 | /* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ |
32 | /* */ |
33 | /*************************************************************************/ |
34 | /* Author : Alan W Black */ |
35 | /* Date : April 1998 */ |
36 | /*-----------------------------------------------------------------------*/ |
37 | /* */ |
38 | /* Yet another unit selection method. */ |
39 | /* */ |
40 | /* Using an acoustic measure find the distance between all units in the */ |
41 | /* db. Try to minimise the mean difference between units in a cluster */ |
42 | /* using CART technology, based on features like phonetic and prosodic */ |
43 | /* context. This gives a bunch of CARTs for each unit type in the db */ |
44 | /* which are acoustically close. Use these as candidates and optimise */ |
45 | /* a path through them minimising join using a viterbi search. */ |
46 | /* */ |
47 | /* Advantages: */ |
48 | /* requires little or no measurements at selection time */ |
49 | /* allows for clear method of pruning */ |
50 | /* no weights need to be generated (well, except where they do) */ |
51 | /* will optimise appropriately with varying numbers of example units */ |
52 | /* */ |
53 | /* Disadvantages: */ |
54 | /* Units can't cross between clusters */ |
55 | /* */ |
56 | /* Implementation of Black, A. and Taylor, P. (1997). Automatically */ |
57 | /* clustering similar units for unit selection in speech synthesis */ |
58 | /* Proceedings of Eurospeech 97, vol2 pp 601-604, Rhodes, Greece. */ |
59 | /* */ |
60 | /* postscript: http://www.cs.cmu.edu/~awb/papers/ES97units.ps */ |
61 | /* http://www.cs.cmu.edu/~awb/papers/ES97units/ES97units.html */ |
62 | /* */ |
63 | /* Comments: */ |
64 | /* */ |
65 | /* This is a new implementation using the newer unit selection/signal */ |
66 | /* processing archtecture in festival */ |
67 | /* */ |
68 | /* This is still in development but become more stable. It is robust */ |
69 | /* for many cases, though a lot depends on the db and parameters */ |
70 | /* you use */ |
71 | /* */ |
72 | /* This had significant new work (and bug fixes) done on it when awb */ |
73 | /* moved to CMU */ |
74 | /* */ |
75 | /*=======================================================================*/ |
76 | #include <cstdlib> |
77 | #include "EST_math.h" |
78 | #include "festival.h" |
79 | #include "clunits.h" |
80 | |
81 | using namespace std; |
82 | |
83 | static EST_String static_unit_prev_move = "unit_prev_move"; |
84 | static EST_String static_unit_this_move = "unit_this_move"; |
85 | static EST_String static_jscore = "local_join_cost"; |
86 | static EST_String static_tscore = "local_target_cost"; |
87 | static EST_String static_cscore = "cummulative_unit_score"; |
88 | |
89 | static void setup_clunits_params(); |
90 | static EST_VTCandidate *TS_candlist(EST_Item *s,EST_Features &f); |
91 | static EST_VTPath *TS_npath(EST_VTPath *p,EST_VTCandidate *c,EST_Features &f); |
92 | static float naive_join_cost(CLunit *unit0, CLunit *unit1, |
93 | EST_Item *s, |
94 | float &u0_move, |
95 | float &u1_move); |
96 | static float optimal_couple(CLunit *u0, |
97 | CLunit *u1, |
98 | float &u0_move, |
99 | float &u1_move, |
100 | int type, |
101 | float different_prev_pen, |
102 | float non_consecutive_pen); |
103 | static void cl_parse_diphone_times(EST_Relation &diphone_stream, |
104 | EST_Relation &source_lab); |
105 | |
106 | VAL_REGISTER_CLASS_NODEL(vtcand,EST_VTCandidate)val_type val_type_vtcand="vtcand"; class EST_VTCandidate *vtcand (const EST_Val &v) { if (v.type() == val_type_vtcand) return (class EST_VTCandidate *)v.internal_ptr(); else (EST_error_where = __null), (*EST_error_func)("val not of type val_type_""vtcand" ); return __null; } static void val_delete_vtcand(void *v) { ( void)v; } EST_Val est_val(const class EST_VTCandidate *v) { return EST_Val(val_type_vtcand, (void *)v,val_delete_vtcand); }; |
107 | VAL_REGISTER_CLASS_NODEL(clunit,CLunit)val_type val_type_clunit="clunit"; class CLunit *clunit(const EST_Val &v) { if (v.type() == val_type_clunit) return (class CLunit *)v.internal_ptr(); else (EST_error_where = __null), ( *EST_error_func)("val not of type val_type_""clunit"); return __null; } static void val_delete_clunit(void *v) { (void)v; } EST_Val est_val(const class CLunit *v) { return EST_Val(val_type_clunit , (void *)v,val_delete_clunit); }; |
108 | |
109 | LISP selection_trees = NIL((struct obj *) 0); |
110 | LISP clunits_params = NIL((struct obj *) 0); |
111 | static int optimal_coupling = 0; |
112 | static int extend_selections = 0; |
113 | static int clunits_debug = 0; |
114 | static int clunits_log_scores = 0; |
115 | static int clunits_smooth_frames = 0; |
116 | float continuity_weight = 1; |
117 | float f0_join_weight = 0.0; |
118 | float different_prev_pen = 1000.0; |
119 | float non_consecutive_pen = 100.0; |
120 | static EST_String clunit_name_feat = "name"; |
121 | |
122 | static CLDB *cldb; |
123 | |
124 | static LISP clunits_select(LISP utt) |
125 | { |
126 | // Select units from db using CARTs to index into clustered unit groups |
127 | EST_Utterance *u = get_c_utt(utt)(utterance(utt)); |
128 | EST_Item *s, *f; |
129 | |
130 | cldb = check_cldb(); // make sure there is one loaded |
131 | setup_clunits_params(); |
132 | |
133 | f = u->relation("Segment")->head(); |
134 | for (s=f; s; s=s->next()) |
135 | s->set_val("clunit_name",ffeature(s,clunit_name_feat)); |
136 | |
137 | if (f) |
138 | { |
139 | EST_Viterbi_Decoder v(TS_candlist,TS_npath,-1); |
140 | v.set_big_is_good(FALSE(1==0)); // big is bad |
141 | |
142 | v.initialise(u->relation("Segment")); |
143 | v.search(); |
144 | if (!v.result("unit_id")) |
145 | { |
146 | cerr << "CLUNIT: failed to find path\n"; |
147 | return utt; |
148 | } |
149 | v.copy_feature(static_unit_this_move); |
150 | v.copy_feature(static_unit_prev_move); |
151 | v.copy_feature(static_jscore); |
152 | v.copy_feature(static_tscore); |
153 | v.copy_feature(static_cscore); |
154 | } |
155 | |
156 | return utt; |
157 | } |
158 | |
159 | static LISP clunits_get_units(LISP utt) |
160 | { |
161 | // Create unit stream and loading params |
162 | EST_Utterance *u = get_c_utt(utt)(utterance(utt)); |
163 | EST_Relation *units,*ss; |
164 | EST_Item *s; |
165 | |
166 | cldb = check_cldb(); // make sure there is one loaded |
167 | |
168 | units = u->create_relation("Unit"); |
169 | for (s=u->relation("Segment")->head(); s != 0; s=s->next()) |
170 | { |
171 | EST_Item *unit = units->append(); |
172 | CLunit *db_unit = clunit(s->f("unit_id")); |
173 | float st,e; |
174 | unit->set_name(db_unit->name); |
175 | unit->set("fileid",db_unit->fileid); |
176 | // These should be modified from the optimal coupling |
177 | if ((s->prev()) && (s->f_present("unit_this_move"))) |
178 | st = s->F("unit_this_move"); |
179 | else |
180 | st = db_unit->start; |
181 | if (s->next() && (s->next()->f_present("unit_prev_move"))) |
182 | e = s->next()->F("unit_prev_move"); |
183 | else |
184 | e = db_unit->end; |
185 | if ((e-st) < 0.011) |
186 | e = st + 0.011; |
187 | unit->set("start",st); |
188 | unit->set("middle",db_unit->start); |
189 | unit->set("end",e); |
190 | unit->set("unit_start",st); |
191 | unit->set("unit_middle",db_unit->start); |
192 | unit->set("unit_end",e); |
193 | unit->set("seg_start",db_unit->start); |
194 | unit->set("seg_end",db_unit->end); |
195 | cldb->load_coefs_sig(unit); |
196 | if (clunits_debug) |
197 | printf("unit: %s fileid %s start %f end %f\n", |
198 | (const char *)db_unit->name, |
199 | (const char *)db_unit->fileid, |
200 | st,e); |
201 | } |
202 | |
203 | // Make it look as much like the diphones as possible for |
204 | // the rest of the code |
205 | ss = u->create_relation("SourceSegments"); |
206 | for (s = u->relation("Segment")->head(); s != 0 ; s = s->next()) |
207 | { |
208 | EST_Item *d = ss->append(); |
209 | d->set_name(ffeature(s,"clunit_name")); |
210 | } |
211 | |
212 | cl_parse_diphone_times(*units,*ss); |
213 | |
214 | return utt; |
215 | } |
216 | |
217 | static void cl_parse_diphone_times(EST_Relation &diphone_stream, |
218 | EST_Relation &source_lab) |
219 | { |
220 | EST_Item *s, *u; |
221 | EST_Track *pm; |
222 | int e_frame, m_frame = 0; |
223 | float dur_1 = 0.0, dur_2 = 0.0, p_time; |
224 | float t_time = 0.0, end; |
225 | p_time = 0.0; |
226 | |
227 | for (s = source_lab.head(), u = diphone_stream.head(); u; u = u->next(), |
228 | s = s->next()) |
229 | { |
230 | pm = track(u->f("coefs")); |
231 | if (pm == 0) |
232 | { |
233 | cerr << "CLUNIT: couldn't get pitchmarks for " << u->name() << endl; |
234 | festival_error()(errjmp_ok ? longjmp(*est_errjmp,1) : festival_tidy_up(),exit (-1)); |
235 | } |
236 | |
237 | e_frame = pm->num_frames() - 1; |
238 | m_frame = u->I("middle_frame"); |
239 | |
240 | dur_1 = pm->t(m_frame); |
241 | dur_2 = pm->t(e_frame) - dur_1; |
242 | |
243 | s->set("end", (dur_1 + p_time)); |
244 | p_time = s->F("end") + dur_2; |
245 | |
246 | end = dur_1 + dur_2 + t_time; |
247 | t_time = end; |
248 | u->set("end", t_time); |
249 | } |
250 | if (s) |
251 | s->set("end", (dur_2 + p_time)); |
252 | } |
253 | |
254 | static LISP clunits_simple_wave(LISP utt) |
255 | { |
256 | // Naive joining of waveforms |
257 | EST_Utterance *u = get_c_utt(utt)(utterance(utt)); |
258 | EST_Wave *w = new EST_Wave; |
259 | EST_Wave *w1 = 0; |
260 | EST_Item *witem = 0; |
261 | EST_Item *s; |
262 | int size,i,k,c; |
263 | |
264 | for (size=0,s=u->relation("Unit")->head(); s != 0; s = s->next()) |
265 | size += wave(s->f("sig"))->num_samples(); |
266 | |
267 | if (u->relation("Unit")->head()) |
268 | { // This will copy the necessary wave features across |
269 | s = u->relation("Unit")->head(); |
270 | *w = *(wave(s->f("sig"))); |
271 | } |
272 | i = w->num_samples(); |
273 | w->resize(size); // its maximum size |
274 | for (s=u->relation("Unit")->head()->next(); s; s=s->next()) |
275 | { |
276 | w1 = wave(s->f("sig")); |
277 | // Find last zero crossing |
278 | for (c=0; ((i > 0) && (c < 40)); c++,i--) |
279 | if (((w->a_no_check(i) < 0) && (w->a_no_check(i-1) >= 0)) || |
280 | ((w->a_no_check(i) >= 0) && (w->a_no_check(i-1) < 0))) |
281 | break; |
282 | if (c == 40) i += 40; |
283 | // Find next zero crossing |
284 | for (c=0,k=1; ((k < w1->num_samples()) && (c < 40)); k++,i++) |
285 | if (((w1->a_no_check(k) < 0) && (w1->a_no_check(k-1) >= 0)) || |
286 | ((w1->a_no_check(k) >= 0) && (w1->a_no_check(k-1) < 0))) |
287 | break; |
288 | if (c == 40) k -= 40; |
289 | for (; k < w1->num_samples(); k++,i++) |
290 | w->a_no_check(i) = w1->a_no_check(k); |
291 | } |
292 | w->resize(i); |
293 | |
294 | witem = u->create_relation("Wave")->append(); |
295 | witem->set_val("wave",est_val(w)); |
296 | |
297 | return utt; |
298 | } |
299 | |
300 | static LISP clunits_windowed_wave(LISP utt) |
301 | { |
302 | // windowed join, no prosodic modification |
303 | EST_Utterance *u = get_c_utt(utt)(utterance(utt)); |
304 | EST_Wave *w = new EST_Wave; |
305 | EST_Wave *w1 = 0; |
306 | EST_Track *t1 = 0; |
307 | EST_Item *witem = 0; |
308 | EST_Item *s; |
309 | int size,i,k,wi,samp_idx, l_samp_idx; |
310 | int width, lwidth; |
311 | EST_Wave *www=0; |
312 | |
313 | for (size=0,s=u->relation("Unit")->head(); s != 0; s = s->next()) |
314 | size += wave(s->f("sig"))->num_samples(); |
315 | |
316 | if (u->relation("Unit")->head()) |
317 | { // This will copy the necessary wave features across |
318 | s = u->relation("Unit")->head(); |
319 | www = wave(s->f("sig")); |
320 | *w = *www; |
321 | } |
322 | w->resize(size); // its maximum size |
323 | wi=0; |
324 | lwidth = width = 0; |
325 | for (s=u->relation("Unit")->head(); s; s=s->next()) |
326 | { |
327 | w1 = wave(s->f("sig")); |
328 | t1 = track(s->f("coefs")); |
329 | |
330 | l_samp_idx = 0; |
331 | for (i=0; i < t1->num_frames()-1; i++) |
332 | { |
333 | samp_idx = (int)(t1->t(i)*w->sample_rate()); |
334 | width = samp_idx - l_samp_idx; |
335 | if (clunits_smooth_frames && (i==0) && (lwidth != 0)) |
336 | width = (width+lwidth)/2; // not sure if this is worth it |
337 | wi += width; |
338 | for (k=-width; ((k<width)&&((samp_idx+k)<w1->num_samples())) ;k++) |
339 | w->a(wi+k) += |
340 | (int)(0.5*(1+cos((PI3.14159265358979323846/(double)(width))*(double)k))* |
341 | w1->a(samp_idx+k)); |
342 | l_samp_idx = samp_idx; |
343 | } |
344 | lwidth = width; |
345 | } |
346 | w->resize(wi); |
347 | |
348 | witem = u->create_relation("Wave")->append(); |
349 | witem->set_val("wave",est_val(w)); |
350 | |
351 | return utt; |
352 | } |
353 | |
354 | static LISP clunits_smoothedjoin_wave(LISP utt) |
355 | { |
356 | // Actually not very smoothed yet, just joined |
357 | EST_Utterance *u = get_c_utt(utt)(utterance(utt)); |
358 | EST_Wave *w = new EST_Wave; |
359 | EST_Wave *w1 = 0; |
360 | EST_Track *t1 = 0; |
361 | EST_Item *witem = 0; |
362 | EST_Item *s; |
363 | int size,i,wi; |
364 | int samp_end, samp_start; |
365 | EST_Wave *www=0; |
366 | |
367 | for (size=0,s=u->relation("Unit")->head(); s != 0; s = s->next()) |
368 | { |
369 | samp_end = s->I("samp_end"); |
370 | samp_start = s->I("samp_start"); |
371 | size += samp_end-samp_start; |
372 | } |
373 | |
374 | if (u->relation("Unit")->head()) |
375 | { // This will copy the necessary wave features across |
376 | s = u->relation("Unit")->head(); |
377 | www = wave(s->f("sig")); |
378 | *w = *www; |
379 | } |
380 | w->resize(size); // its maximum size |
381 | wi=0; |
382 | for (s=u->relation("Unit")->head(); s; s=s->next()) |
383 | { |
384 | samp_end = s->I("samp_end"); |
385 | samp_start = s->I("samp_start"); |
386 | w1 = wave(s->f("sig")); |
387 | /* printf("%s %s %f %f %d %d\n", |
388 | (const char *)s->S("name"), |
389 | (const char *)s->S("fileid"), |
390 | (float)samp_start/(float)w->sample_rate(), |
391 | (float)samp_end/(float)w->sample_rate(), |
392 | w1->num_samples(), |
393 | samp_end); */ |
394 | t1 = track(s->f("coefs")); |
395 | for (i=samp_start; i<samp_end; i++,wi++) |
396 | w->a_no_check(wi) = w1->a_no_check(i); |
397 | /* printf("%d %f\n",wi,(float)wi/(float)w->sample_rate()); */ |
398 | } |
399 | w->resize(wi); |
400 | |
401 | witem = u->create_relation("Wave")->append(); |
402 | witem->set_val("wave",est_val(w)); |
403 | |
404 | return utt; |
405 | } |
406 | |
407 | static void setup_clunits_params() |
408 | { |
409 | // Set up params |
410 | clunits_params = siod_get_lval("clunits_params", |
411 | "CLUNITS: no parameters set for module"); |
412 | optimal_coupling = get_param_int("optimal_coupling",clunits_params,0); |
413 | different_prev_pen = get_param_float("different_prev_pen",clunits_params,1000.0); |
414 | non_consecutive_pen = get_param_float("non_consectutive_pen",clunits_params,100.0); |
415 | extend_selections = get_param_int("extend_selections",clunits_params,0); |
416 | continuity_weight = get_param_float("continuity_weight",clunits_params,1); |
417 | f0_join_weight = get_param_float("f0_join_weight",clunits_params,0.0); |
418 | clunits_debug = get_param_int("clunits_debug",clunits_params,0); |
419 | clunits_log_scores = get_param_int("log_scores",clunits_params,0); |
420 | clunits_smooth_frames = get_param_int("smooth_frames",clunits_params,0); |
421 | clunit_name_feat = get_param_str("clunit_name_feat",clunits_params,"name"); |
422 | selection_trees = |
423 | siod_get_lval("clunits_selection_trees", |
424 | "CLUNITS: clunits_selection_trees unbound"); |
425 | } |
426 | |
427 | static EST_VTCandidate *TS_candlist(EST_Item *s,EST_Features &f) |
428 | { |
429 | // Return a list of candidate units for target s |
430 | // Use the appropriate CART to select a small group of candidates |
431 | EST_VTCandidate *all_cands = 0; |
432 | EST_VTCandidate *c, *gt; |
433 | LISP tree,group,l,pd,cc,ls; |
434 | EST_String name; |
435 | EST_String lookingfor; |
436 | CLunit *u; |
437 | int bbb,ccc; |
438 | float cluster_mean; |
439 | (void)f; |
440 | bbb=ccc=0; |
Value stored to 'bbb' is never read | |
441 | |
442 | lookingfor = s->S("clunit_name"); |
443 | ls = siod(s); |
444 | |
445 | cc = siod_get_lval("clunits_cand_hooks",NULL__null); |
446 | if (cc) |
447 | pd = apply_hooks(siod_get_lval("clunits_cand_hooks",NULL__null), |
448 | ls); |
449 | else |
450 | { |
451 | tree = car(cdr(siod_assoc_str(lookingfor,selection_trees))); |
452 | pd = wagon_pd(s,tree); |
453 | } |
454 | if (pd == NIL((struct obj *) 0)) |
455 | { |
456 | cerr << "CLUNITS: no predicted class for " << |
457 | s->S("clunit_name") << endl; |
458 | festival_error()(errjmp_ok ? longjmp(*est_errjmp,1) : festival_tidy_up(),exit (-1)); |
459 | } |
460 | group = car(pd); |
461 | cluster_mean = get_c_float(car(cdr(pd))); |
462 | |
463 | for (bbb=0,l=group; l != NIL((struct obj *) 0); l=cdr(l),bbb++) |
464 | { |
465 | c = new EST_VTCandidate; |
466 | name = s->S("clunit_name")+"_"+get_c_string(car(car(l))); |
467 | u = cldb->get_unit(name); |
468 | if (u == 0) |
469 | { |
470 | cerr << "CLUNITS: failed to find unit " << name << |
471 | " in index" << endl; |
472 | festival_error()(errjmp_ok ? longjmp(*est_errjmp,1) : festival_tidy_up(),exit (-1)); |
473 | } |
474 | cldb->load_join_coefs(u); |
475 | c->name = est_val(u); |
476 | c->s = s; |
477 | // Mean distance from others in cluster (could be precalculated) |
478 | c->score = get_c_float(car(cdr(car(l))))-cluster_mean; |
479 | c->score *= c->score; |
480 | // Maybe this should be divided by overall mean of set |
481 | // to normalise this figure (?) |
482 | |
483 | c->next = all_cands; |
484 | all_cands = c; |
485 | } |
486 | |
487 | if (extend_selections) |
488 | { |
489 | // An experiment, for all candidates of the previous |
490 | // item whose following is of this phone type, include |
491 | // them as a candidate |
492 | EST_Item *ppp = s->prev(); |
493 | if (ppp) |
494 | { |
495 | EST_VTCandidate *lc = vtcand(ppp->f("unit_cands")); |
496 | for (ccc=0 ; lc && (ccc < extend_selections); lc = lc->next) |
497 | { |
498 | CLunit *unit = clunit(lc->name); |
499 | CLunit *next_unit; |
500 | |
501 | if (unit->next_unit) |
502 | next_unit = unit->next_unit; |
503 | else |
504 | continue; |
505 | EST_String ss; |
506 | ss = next_unit->name.before("_"); |
507 | if (ss.matches(".*_.*_.*")) |
508 | { |
509 | ss += "_"; |
510 | ss += next_unit->name.after("_").before("_"); |
511 | } |
512 | /* printf("%s %s\n",(const char *)ss, (const char *)lookingfor); */ |
513 | for (gt=all_cands; gt; gt=gt->next) |
514 | if (clunit(gt->name)->name == next_unit->name) |
515 | break; /* got this one already */ |
516 | if ((ss == lookingfor) && (gt == 0)) |
517 | { // its the right type so add it |
518 | c = new EST_VTCandidate; |
519 | c->name = est_val(next_unit); |
520 | cldb->load_join_coefs(next_unit); |
521 | c->s = s; |
522 | c->score = 0; |
523 | c->next = all_cands; |
524 | all_cands = c; |
525 | bbb++; |
526 | ccc++; |
527 | } |
528 | } |
529 | } |
530 | |
531 | s->set_val("unit_cands",est_val(all_cands)); |
532 | } |
533 | if (clunits_debug) |
534 | printf("cands %d (extends %d) %s\n",bbb,ccc,(const char *)lookingfor); |
535 | return all_cands; |
536 | } |
537 | |
538 | static EST_VTPath *TS_npath(EST_VTPath *p,EST_VTCandidate *c,EST_Features &f) |
539 | { |
540 | // Combine candidate c with previous path updating score |
541 | // with join cost |
542 | float cost; |
543 | EST_VTPath *np = new EST_VTPath; |
544 | CLunit *u0, *u1; |
545 | float u0_move=0.0, u1_move=0.0; |
546 | (void)f; |
547 | |
548 | np->c = c; |
549 | np->from = p; |
550 | if ((p == 0) || (p->c == 0)) |
551 | cost = 0; // nothing previous to join to |
552 | else |
553 | { |
554 | u0 = clunit(p->c->name); |
555 | u1 = clunit(c->name); |
556 | // printf("u0 %s u1 %s\n", |
557 | // (const char *)u0->name, |
558 | // (const char *)u1->name); |
559 | if (optimal_coupling) |
560 | cost = optimal_couple(u0,u1,u0_move,u1_move, |
561 | optimal_coupling, |
562 | different_prev_pen, |
563 | non_consecutive_pen); |
564 | else // naive measure |
565 | cost = naive_join_cost(u0,u1,c->s,u0_move,u1_move); |
566 | // When optimal_coupling == 2 the moves will be 0, just the scores |
567 | // are relevant |
568 | if (optimal_coupling == 1) |
569 | { |
570 | np->f.set(static_unit_prev_move,u0_move); // new (prev) end |
571 | np->f.set(static_unit_this_move,u1_move); // new start |
572 | } |
573 | } |
574 | // printf("cost %f continuity_weight %f\n", cost, continuity_weight); |
575 | cost *= continuity_weight; |
576 | np->state = c->pos; // "state" is candidate number |
577 | if (clunits_log_scores && (cost != 0)) |
578 | cost = log(cost); |
579 | |
580 | np->f.set(static_jscore,cost); |
581 | np->f.set(static_tscore,c->score); |
582 | if (p==0) |
583 | np->score = (c->score+cost); |
584 | else |
585 | np->score = (c->score+cost) + p->score; |
586 | np->f.set(static_cscore,np->score); |
587 | |
588 | if (clunits_debug > 1) |
589 | printf("joining cost %f\n",np->score); |
590 | return np; |
591 | } |
592 | |
593 | static float optimal_couple(CLunit *u0, |
594 | CLunit *u1, |
595 | float &u0_move, |
596 | float &u1_move, |
597 | int type, |
598 | float different_prev_pen, |
599 | float non_consecutive_pen |
600 | ) |
601 | { |
602 | // Find combination cost of u0 to u1, checking for best |
603 | // frame up to n frames back in u0 and u1. |
604 | // Note this checks the u0 with u1's predecessor, which may or may not |
605 | // be of the same type |
606 | // There is some optimisation here in unit coeff access |
607 | EST_Track *u0_cep, *u1_p_cep; |
608 | float dist, best_val; |
609 | int i,eee; |
610 | int u0_st, u0_end; |
611 | int u1_p_st, u1_p_end; |
612 | int best_u0, best_u1; |
613 | CLunit *u1_p; |
614 | float f; |
615 | |
616 | u1_p = u1->prev_unit; |
617 | |
618 | u0_move = u0->end; |
619 | if (u1_p == 0) |
620 | u1_move = 0; |
621 | else |
622 | u1_move = u1_p->end; |
623 | |
624 | if (u1_p == u0) // they are consecutive |
625 | return 0.0; |
626 | if (u1_p == 0) // hacky condition, when there is no previous we'll |
627 | return 0.0; // assume a good join (should be silence there) |
628 | |
629 | if (u1_p->join_coeffs == 0) |
630 | cldb->load_join_coefs(u1_p); |
631 | // Get indexes into full cep for utterances rather than sub ceps |
632 | u0_cep = u0->join_coeffs; |
633 | u1_p_cep = u1_p->join_coeffs; |
634 | |
635 | u0_end = u0_cep->num_frames(); |
636 | u1_p_end = u1_p_cep->num_frames(); |
637 | |
638 | if (!streq(u1_p->base_name,u0->base_name)(strcmp(u1_p->base_name,u0->base_name)==0)) |
639 | { /* prev(u1) is a different phone from u0 so don't slide */ |
640 | f = different_prev_pen; |
641 | u0_st = u0_cep->num_frames()-1; |
642 | u1_p_st = u1_p_cep->num_frames()-1; |
643 | } |
644 | else if (type == 2) |
645 | { /* we'll only check the edge for the join */ |
646 | u0_st = u0_cep->num_frames()-1; |
647 | u1_p_st = u1_p_cep->num_frames()-1; |
648 | f = 1; |
649 | } |
650 | else |
651 | { |
652 | u0_st = (int)(u0_cep->num_frames() * 0.33); |
653 | u1_p_st = (int)(u1_p_cep->num_frames() * 0.33); |
654 | f = 1; |
655 | } |
656 | |
657 | best_u0=u0_end; |
658 | best_u1=u1_p_end; |
659 | best_val = HUGE_VAL(__builtin_huge_val()); |
660 | |
661 | // Here we look for the best join without sliding the windows |
662 | if ((u0_end-u0_st) < (u1_p_end-u1_p_st)) |
663 | eee = u0_end-u0_st; |
664 | else |
665 | eee = u1_p_end-u1_p_st; |
666 | for (i=0; i < eee; i++) |
667 | { |
668 | dist = frame_distance(*u0_cep,i+u0_st, |
669 | *u1_p_cep,i+u1_p_st, |
670 | cldb->cweights, |
671 | f0_join_weight); |
672 | if (dist < best_val) |
673 | { |
674 | best_val = dist; |
675 | best_u0 = i+u0_st; |
676 | best_u1 = i+u1_p_st; |
677 | } |
678 | } |
679 | #if 0 |
680 | // This tries *all* possible matches in the pair, its slow |
681 | // and has a tendency to shorten things more than you'd like |
682 | // so we just use the more simple test above. |
683 | int j; |
684 | for (i=u0_st; i < u0_end; i++) |
685 | { |
686 | for (j=u1_p_st; j < u1_p_end; j++) |
687 | { |
688 | dist = frame_distance(*u0_cep,i, |
689 | *u1_p_cep,j, |
690 | cldb->cweights); |
691 | if (dist < best_val) |
692 | { |
693 | best_val = dist; |
694 | best_u0 = i; |
695 | best_u1 = j; |
696 | } |
697 | } |
698 | } |
699 | #endif |
700 | |
701 | if (type == 1) |
702 | { |
703 | u0_move = u0_cep->t(best_u0); |
704 | u1_move = u1_p_cep->t(best_u1); |
705 | } |
706 | |
707 | return non_consecutive_pen+(best_val*f); |
708 | } |
709 | |
710 | static float naive_join_cost(CLunit *unit0, CLunit *unit1, |
711 | EST_Item *s, |
712 | float &u0_move, |
713 | float &u1_move) |
714 | { |
715 | // A naive join cost, because I haven't ported the info yet |
716 | |
717 | u0_move = unit0->end; |
718 | u1_move = unit1->start; |
719 | |
720 | if (unit0 == unit1) |
721 | return 0; |
722 | else if (unit1->prev_unit->name == unit0->name) |
723 | return 0; |
724 | else if (ph_is_silence(s->name())) |
725 | return 0; |
726 | else if (ph_is_stop(s->name())) |
727 | return 0.2; |
728 | else if (ph_is_fricative(s->name())) |
729 | return 0.3; |
730 | else |
731 | return 1.0; |
732 | } |
733 | |
734 | static LISP cldb_load_all_coeffs(LISP filelist) |
735 | { |
736 | LISP f; |
737 | |
738 | cldb = check_cldb(); |
739 | for (f=filelist; f; f=cdr(f)) |
740 | { |
741 | cldb->get_file_coefs_sig(get_c_string(car(f))); |
742 | cldb->get_file_join_coefs(get_c_string(car(f))); |
743 | } |
744 | |
745 | return NIL((struct obj *) 0); |
746 | } |
747 | |
748 | void festival_clunits_init(void) |
749 | { |
750 | // Initialization for clunits selection |
751 | |
752 | proclaim_module("clunits", |
753 | "Copyright (C) University of Edinburgh and CMU 1997-2010\n"); |
754 | |
755 | gc_protect(&clunits_params); |
756 | gc_protect(&selection_trees); |
757 | |
758 | festival_def_utt_module("Clunits_Select",clunits_select, |
759 | "(Clunits_Select UTT)\n\ |
760 | Select units from current databases using cluster selection method."); |
761 | |
762 | festival_def_utt_module("Clunits_Get_Units",clunits_get_units, |
763 | "(Clunits_Get_Units UTT)\n\ |
764 | Construct Unit relation from the selected units in Segment and extract\n\ |
765 | their parameters from the clunit db."); |
766 | |
767 | festival_def_utt_module("Clunits_Simple_Wave",clunits_simple_wave, |
768 | "(Clunits_Simple_Wave UTT)\n\ |
769 | Naively concatenate signals together into a single wave (for debugging)."); |
770 | |
771 | festival_def_utt_module("Clunits_Windowed_Wave",clunits_windowed_wave, |
772 | "(Clunits_Windowed_Wave UTT)\n\ |
773 | Use hamming window over edges of units to join them, no prosodic \n\ |
774 | modification though."); |
775 | |
776 | festival_def_utt_module("Clunits_SmoothedJoin_Wave",clunits_smoothedjoin_wave, |
777 | "(Clunits_SmoothedJoin_Wave UTT)\n\ |
778 | smoothed join."); |
779 | |
780 | init_subr_1("clunits:load_db",cl_load_db, |
781 | "(clunits:load_db PARAMS)\n\ |
782 | Load index file for cluster database and set up params, and select it."); |
783 | |
784 | init_subr_1("clunits:select",cldb_select, |
785 | "(clunits:select NAME)\n\ |
786 | Select a previously loaded cluster database."); |
787 | |
788 | init_subr_1("clunits:load_all_coefs",cldb_load_all_coeffs, |
789 | "(clunits:load_all_coefs FILEIDLIST)\n\ |
790 | Load in coefficients, signal and join coefficients for each named\n\ |
791 | fileid. This is can be called at startup to to reduce the load time\n\ |
792 | during synthesis (though may make the image large)."); |
793 | |
794 | init_subr_0("clunits:list",cldb_list, |
795 | "(clunits:list)\n\ |
796 | List names of currently loaded cluster databases."); |
797 | |
798 | init_subr_2("acost:build_disttabs",make_unit_distance_tables, |
799 | "(acost:build_disttabs UTTTYPES PARAMS)\n\ |
800 | Built matrices of distances between each ling_item in each each list\n\ |
801 | of ling_items in uttypes. Uses acoustic weights in PARAMS and save\n\ |
802 | the result as a matrix for later use."); |
803 | |
804 | init_subr_2("acost:utt.load_coeffs",acost_utt_load_coeffs, |
805 | "(acost:utt.load_coeffs UTT PARAMS)\n\ |
806 | Load in the acoustic coefficients into UTT and set the Acoustic_Coeffs\n\ |
807 | feature for each segment in UTT."); |
808 | |
809 | init_subr_3("acost:file_difference",ac_distance_tracks, |
810 | "(acost:file_difference FILENAME1 FILENAME2 PARAMS)\n\ |
811 | Load in the two named tracks and find the acoustic difference over all\n\ |
812 | based on the weights in PARAMS."); |
813 | |
814 | init_subr_2("cl_mapping", l_cl_mapping, |
815 | "(cl_mapping UTT PARAMS)\n\ |
816 | Impose prosody upto some percentage, and not absolutely."); |
817 | |
818 | } |