| File: | modules/clunits/clunits.cc | 
| Location: | line 394, column 2 | 
| Description: | Value stored to 't1' is never read | 
| 1 | /*************************************************************************/ | 
| 2 | /* */ | 
| 3 | /* Carnegie Mellon University and */ | 
| 4 | /* Centre for Speech Technology Research */ | 
| 5 | /* University of Edinburgh, UK */ | 
| 6 | /* Copyright (c) 1998-2001 */ | 
| 7 | /* All Rights Reserved. */ | 
| 8 | /* */ | 
| 9 | /* Permission is hereby granted, free of charge, to use and distribute */ | 
| 10 | /* this software and its documentation without restriction, including */ | 
| 11 | /* without limitation the rights to use, copy, modify, merge, publish, */ | 
| 12 | /* distribute, sublicense, and/or sell copies of this work, and to */ | 
| 13 | /* permit persons to whom this work is furnished to do so, subject to */ | 
| 14 | /* the following conditions: */ | 
| 15 | /* 1. The code must retain the above copyright notice, this list of */ | 
| 16 | /* conditions and the following disclaimer. */ | 
| 17 | /* 2. Any modifications must be clearly marked as such. */ | 
| 18 | /* 3. Original authors' names are not deleted. */ | 
| 19 | /* 4. The authors' names are not used to endorse or promote products */ | 
| 20 | /* derived from this software without specific prior written */ | 
| 21 | /* permission. */ | 
| 22 | /* */ | 
| 23 | /* THE UNIVERSITY OF EDINBURGH, CARNEGIE MELLON UNIVERSITY AND THE */ | 
| 24 | /* CONTRIBUTORS TO THIS WORK DISCLAIM ALL WARRANTIES WITH REGARD TO */ | 
| 25 | /* THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY */ | 
| 26 | /* AND FITNESS, IN NO EVENT SHALL THE UNIVERSITY OF EDINBURGH, CARNEGIE */ | 
| 27 | /* MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE FOR ANY SPECIAL, */ | 
| 28 | /* INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER */ | 
| 29 | /* RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION */ | 
| 30 | /* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF */ | 
| 31 | /* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ | 
| 32 | /* */ | 
| 33 | /*************************************************************************/ | 
| 34 | /* Author : Alan W Black */ | 
| 35 | /* Date : April 1998 */ | 
| 36 | /*-----------------------------------------------------------------------*/ | 
| 37 | /* */ | 
| 38 | /* Yet another unit selection method. */ | 
| 39 | /* */ | 
| 40 | /* Using an acoustic measure find the distance between all units in the */ | 
| 41 | /* db. Try to minimise the mean difference between units in a cluster */ | 
| 42 | /* using CART technology, based on features like phonetic and prosodic */ | 
| 43 | /* context. This gives a bunch of CARTs for each unit type in the db */ | 
| 44 | /* which are acoustically close. Use these as candidates and optimise */ | 
| 45 | /* a path through them minimising join using a viterbi search. */ | 
| 46 | /* */ | 
| 47 | /* Advantages: */ | 
| 48 | /* requires little or no measurements at selection time */ | 
| 49 | /* allows for clear method of pruning */ | 
| 50 | /* no weights need to be generated (well, except where they do) */ | 
| 51 | /* will optimise appropriately with varying numbers of example units */ | 
| 52 | /* */ | 
| 53 | /* Disadvantages: */ | 
| 54 | /* Units can't cross between clusters */ | 
| 55 | /* */ | 
| 56 | /* Implementation of Black, A. and Taylor, P. (1997). Automatically */ | 
| 57 | /* clustering similar units for unit selection in speech synthesis */ | 
| 58 | /* Proceedings of Eurospeech 97, vol2 pp 601-604, Rhodes, Greece. */ | 
| 59 | /* */ | 
| 60 | /* postscript: http://www.cs.cmu.edu/~awb/papers/ES97units.ps */ | 
| 61 | /* http://www.cs.cmu.edu/~awb/papers/ES97units/ES97units.html */ | 
| 62 | /* */ | 
| 63 | /* Comments: */ | 
| 64 | /* */ | 
| 65 | /* This is a new implementation using the newer unit selection/signal */ | 
| 66 | /* processing archtecture in festival */ | 
| 67 | /* */ | 
| 68 | /* This is still in development but become more stable. It is robust */ | 
| 69 | /* for many cases, though a lot depends on the db and parameters */ | 
| 70 | /* you use */ | 
| 71 | /* */ | 
| 72 | /* This had significant new work (and bug fixes) done on it when awb */ | 
| 73 | /* moved to CMU */ | 
| 74 | /* */ | 
| 75 | /*=======================================================================*/ | 
| 76 | #include <cstdlib> | 
| 77 | #include "EST_math.h" | 
| 78 | #include "festival.h" | 
| 79 | #include "clunits.h" | 
| 80 | |
| 81 | using namespace std; | 
| 82 | |
| 83 | static EST_String static_unit_prev_move = "unit_prev_move"; | 
| 84 | static EST_String static_unit_this_move = "unit_this_move"; | 
| 85 | static EST_String static_jscore = "local_join_cost"; | 
| 86 | static EST_String static_tscore = "local_target_cost"; | 
| 87 | static EST_String static_cscore = "cummulative_unit_score"; | 
| 88 | |
| 89 | static void setup_clunits_params(); | 
| 90 | static EST_VTCandidate *TS_candlist(EST_Item *s,EST_Features &f); | 
| 91 | static EST_VTPath *TS_npath(EST_VTPath *p,EST_VTCandidate *c,EST_Features &f); | 
| 92 | static float naive_join_cost(CLunit *unit0, CLunit *unit1, | 
| 93 | EST_Item *s, | 
| 94 | float &u0_move, | 
| 95 | float &u1_move); | 
| 96 | static float optimal_couple(CLunit *u0, | 
| 97 | CLunit *u1, | 
| 98 | float &u0_move, | 
| 99 | float &u1_move, | 
| 100 | int type, | 
| 101 | float different_prev_pen, | 
| 102 | float non_consecutive_pen); | 
| 103 | static void cl_parse_diphone_times(EST_Relation &diphone_stream, | 
| 104 | EST_Relation &source_lab); | 
| 105 | |
| 106 | VAL_REGISTER_CLASS_NODEL(vtcand,EST_VTCandidate)val_type val_type_vtcand="vtcand"; class EST_VTCandidate *vtcand (const EST_Val &v) { if (v.type() == val_type_vtcand) return (class EST_VTCandidate *)v.internal_ptr(); else (EST_error_where = __null), (*EST_error_func)("val not of type val_type_""vtcand" ); return __null; } static void val_delete_vtcand(void *v) { ( void)v; } EST_Val est_val(const class EST_VTCandidate *v) { return EST_Val(val_type_vtcand, (void *)v,val_delete_vtcand); };  | 
| 107 | VAL_REGISTER_CLASS_NODEL(clunit,CLunit)val_type val_type_clunit="clunit"; class CLunit *clunit(const EST_Val &v) { if (v.type() == val_type_clunit) return (class CLunit *)v.internal_ptr(); else (EST_error_where = __null), ( *EST_error_func)("val not of type val_type_""clunit"); return __null; } static void val_delete_clunit(void *v) { (void)v; } EST_Val est_val(const class CLunit *v) { return EST_Val(val_type_clunit , (void *)v,val_delete_clunit); };  | 
| 108 | |
| 109 | LISP selection_trees = NIL((struct obj *) 0); | 
| 110 | LISP clunits_params = NIL((struct obj *) 0); | 
| 111 | static int optimal_coupling = 0; | 
| 112 | static int extend_selections = 0; | 
| 113 | static int clunits_debug = 0; | 
| 114 | static int clunits_log_scores = 0; | 
| 115 | static int clunits_smooth_frames = 0; | 
| 116 | float continuity_weight = 1; | 
| 117 | float f0_join_weight = 0.0; | 
| 118 | float different_prev_pen = 1000.0; | 
| 119 | float non_consecutive_pen = 100.0; | 
| 120 | static EST_String clunit_name_feat = "name"; | 
| 121 | |
| 122 | static CLDB *cldb; | 
| 123 | |
| 124 | static LISP clunits_select(LISP utt) | 
| 125 | { | 
| 126 | // Select units from db using CARTs to index into clustered unit groups | 
| 127 | EST_Utterance *u = get_c_utt(utt)(utterance(utt)); | 
| 128 | EST_Item *s, *f; | 
| 129 | |
| 130 | cldb = check_cldb(); // make sure there is one loaded | 
| 131 | setup_clunits_params(); | 
| 132 | |
| 133 | f = u->relation("Segment")->head(); | 
| 134 | for (s=f; s; s=s->next()) | 
| 135 | s->set_val("clunit_name",ffeature(s,clunit_name_feat)); | 
| 136 | |
| 137 | if (f) | 
| 138 | { | 
| 139 | EST_Viterbi_Decoder v(TS_candlist,TS_npath,-1); | 
| 140 | v.set_big_is_good(FALSE(1==0)); // big is bad | 
| 141 | |
| 142 | v.initialise(u->relation("Segment")); | 
| 143 | v.search(); | 
| 144 | if (!v.result("unit_id")) | 
| 145 | { | 
| 146 | cerr << "CLUNIT: failed to find path\n"; | 
| 147 | return utt; | 
| 148 | } | 
| 149 | v.copy_feature(static_unit_this_move); | 
| 150 | v.copy_feature(static_unit_prev_move); | 
| 151 | v.copy_feature(static_jscore); | 
| 152 | v.copy_feature(static_tscore); | 
| 153 | v.copy_feature(static_cscore); | 
| 154 | } | 
| 155 | |
| 156 | return utt; | 
| 157 | } | 
| 158 | |
| 159 | static LISP clunits_get_units(LISP utt) | 
| 160 | { | 
| 161 | // Create unit stream and loading params | 
| 162 | EST_Utterance *u = get_c_utt(utt)(utterance(utt)); | 
| 163 | EST_Relation *units,*ss; | 
| 164 | EST_Item *s; | 
| 165 | |
| 166 | cldb = check_cldb(); // make sure there is one loaded | 
| 167 | |
| 168 | units = u->create_relation("Unit"); | 
| 169 | for (s=u->relation("Segment")->head(); s != 0; s=s->next()) | 
| 170 | { | 
| 171 | EST_Item *unit = units->append(); | 
| 172 | CLunit *db_unit = clunit(s->f("unit_id")); | 
| 173 | float st,e; | 
| 174 | unit->set_name(db_unit->name); | 
| 175 | unit->set("fileid",db_unit->fileid); | 
| 176 | // These should be modified from the optimal coupling | 
| 177 | if ((s->prev()) && (s->f_present("unit_this_move"))) | 
| 178 | st = s->F("unit_this_move"); | 
| 179 | else | 
| 180 | st = db_unit->start; | 
| 181 | if (s->next() && (s->next()->f_present("unit_prev_move"))) | 
| 182 | e = s->next()->F("unit_prev_move"); | 
| 183 | else | 
| 184 | e = db_unit->end; | 
| 185 | if ((e-st) < 0.011) | 
| 186 | e = st + 0.011; | 
| 187 | unit->set("start",st); | 
| 188 | unit->set("middle",db_unit->start); | 
| 189 | unit->set("end",e); | 
| 190 | unit->set("unit_start",st); | 
| 191 | unit->set("unit_middle",db_unit->start); | 
| 192 | unit->set("unit_end",e); | 
| 193 | unit->set("seg_start",db_unit->start); | 
| 194 | unit->set("seg_end",db_unit->end); | 
| 195 | cldb->load_coefs_sig(unit); | 
| 196 | if (clunits_debug) | 
| 197 | printf("unit: %s fileid %s start %f end %f\n", | 
| 198 | (const char *)db_unit->name, | 
| 199 | (const char *)db_unit->fileid, | 
| 200 | st,e); | 
| 201 | } | 
| 202 | |
| 203 | // Make it look as much like the diphones as possible for | 
| 204 | // the rest of the code | 
| 205 | ss = u->create_relation("SourceSegments"); | 
| 206 | for (s = u->relation("Segment")->head(); s != 0 ; s = s->next()) | 
| 207 | { | 
| 208 | EST_Item *d = ss->append(); | 
| 209 | d->set_name(ffeature(s,"clunit_name")); | 
| 210 | } | 
| 211 | |
| 212 | cl_parse_diphone_times(*units,*ss); | 
| 213 | |
| 214 | return utt; | 
| 215 | } | 
| 216 | |
| 217 | static void cl_parse_diphone_times(EST_Relation &diphone_stream, | 
| 218 | EST_Relation &source_lab) | 
| 219 | { | 
| 220 | EST_Item *s, *u; | 
| 221 | EST_Track *pm; | 
| 222 | int e_frame, m_frame = 0; | 
| 223 | float dur_1 = 0.0, dur_2 = 0.0, p_time; | 
| 224 | float t_time = 0.0, end; | 
| 225 | p_time = 0.0; | 
| 226 | |
| 227 | for (s = source_lab.head(), u = diphone_stream.head(); u; u = u->next(), | 
| 228 | s = s->next()) | 
| 229 | { | 
| 230 | pm = track(u->f("coefs")); | 
| 231 | if (pm == 0) | 
| 232 | { | 
| 233 | cerr << "CLUNIT: couldn't get pitchmarks for " << u->name() << endl; | 
| 234 | 	    festival_error()(errjmp_ok ? longjmp(*est_errjmp,1) : festival_tidy_up(),exit (-1));  | 
| 235 | } | 
| 236 | |
| 237 | e_frame = pm->num_frames() - 1; | 
| 238 | m_frame = u->I("middle_frame"); | 
| 239 | |
| 240 | dur_1 = pm->t(m_frame); | 
| 241 | dur_2 = pm->t(e_frame) - dur_1; | 
| 242 | |
| 243 | s->set("end", (dur_1 + p_time)); | 
| 244 | p_time = s->F("end") + dur_2; | 
| 245 | |
| 246 | end = dur_1 + dur_2 + t_time; | 
| 247 | t_time = end; | 
| 248 | u->set("end", t_time); | 
| 249 | } | 
| 250 | if (s) | 
| 251 | s->set("end", (dur_2 + p_time)); | 
| 252 | } | 
| 253 | |
| 254 | static LISP clunits_simple_wave(LISP utt) | 
| 255 | { | 
| 256 | // Naive joining of waveforms | 
| 257 | EST_Utterance *u = get_c_utt(utt)(utterance(utt)); | 
| 258 | EST_Wave *w = new EST_Wave; | 
| 259 | EST_Wave *w1 = 0; | 
| 260 | EST_Item *witem = 0; | 
| 261 | EST_Item *s; | 
| 262 | int size,i,k,c; | 
| 263 | |
| 264 | for (size=0,s=u->relation("Unit")->head(); s != 0; s = s->next()) | 
| 265 | size += wave(s->f("sig"))->num_samples(); | 
| 266 | |
| 267 | if (u->relation("Unit")->head()) | 
| 268 | { // This will copy the necessary wave features across | 
| 269 | s = u->relation("Unit")->head(); | 
| 270 | *w = *(wave(s->f("sig"))); | 
| 271 | } | 
| 272 | i = w->num_samples(); | 
| 273 | w->resize(size); // its maximum size | 
| 274 | for (s=u->relation("Unit")->head()->next(); s; s=s->next()) | 
| 275 | { | 
| 276 | w1 = wave(s->f("sig")); | 
| 277 | // Find last zero crossing | 
| 278 | for (c=0; ((i > 0) && (c < 40)); c++,i--) | 
| 279 | if (((w->a_no_check(i) < 0) && (w->a_no_check(i-1) >= 0)) || | 
| 280 | ((w->a_no_check(i) >= 0) && (w->a_no_check(i-1) < 0))) | 
| 281 | break; | 
| 282 | if (c == 40) i += 40; | 
| 283 | // Find next zero crossing | 
| 284 | for (c=0,k=1; ((k < w1->num_samples()) && (c < 40)); k++,i++) | 
| 285 | if (((w1->a_no_check(k) < 0) && (w1->a_no_check(k-1) >= 0)) || | 
| 286 | ((w1->a_no_check(k) >= 0) && (w1->a_no_check(k-1) < 0))) | 
| 287 | break; | 
| 288 | if (c == 40) k -= 40; | 
| 289 | for (; k < w1->num_samples(); k++,i++) | 
| 290 | w->a_no_check(i) = w1->a_no_check(k); | 
| 291 | } | 
| 292 | w->resize(i); | 
| 293 | |
| 294 | witem = u->create_relation("Wave")->append(); | 
| 295 | witem->set_val("wave",est_val(w)); | 
| 296 | |
| 297 | return utt; | 
| 298 | } | 
| 299 | |
| 300 | static LISP clunits_windowed_wave(LISP utt) | 
| 301 | { | 
| 302 | // windowed join, no prosodic modification | 
| 303 | EST_Utterance *u = get_c_utt(utt)(utterance(utt)); | 
| 304 | EST_Wave *w = new EST_Wave; | 
| 305 | EST_Wave *w1 = 0; | 
| 306 | EST_Track *t1 = 0; | 
| 307 | EST_Item *witem = 0; | 
| 308 | EST_Item *s; | 
| 309 | int size,i,k,wi,samp_idx, l_samp_idx; | 
| 310 | int width, lwidth; | 
| 311 | EST_Wave *www=0; | 
| 312 | |
| 313 | for (size=0,s=u->relation("Unit")->head(); s != 0; s = s->next()) | 
| 314 | size += wave(s->f("sig"))->num_samples(); | 
| 315 | |
| 316 | if (u->relation("Unit")->head()) | 
| 317 | { // This will copy the necessary wave features across | 
| 318 | s = u->relation("Unit")->head(); | 
| 319 | www = wave(s->f("sig")); | 
| 320 | *w = *www; | 
| 321 | } | 
| 322 | w->resize(size); // its maximum size | 
| 323 | wi=0; | 
| 324 | lwidth = width = 0; | 
| 325 | for (s=u->relation("Unit")->head(); s; s=s->next()) | 
| 326 | { | 
| 327 | w1 = wave(s->f("sig")); | 
| 328 | t1 = track(s->f("coefs")); | 
| 329 | |
| 330 | l_samp_idx = 0; | 
| 331 | for (i=0; i < t1->num_frames()-1; i++) | 
| 332 | { | 
| 333 | samp_idx = (int)(t1->t(i)*w->sample_rate()); | 
| 334 | width = samp_idx - l_samp_idx; | 
| 335 | if (clunits_smooth_frames && (i==0) && (lwidth != 0)) | 
| 336 | width = (width+lwidth)/2; // not sure if this is worth it | 
| 337 | wi += width; | 
| 338 | for (k=-width; ((k<width)&&((samp_idx+k)<w1->num_samples())) ;k++) | 
| 339 | w->a(wi+k) += | 
| 340 | (int)(0.5*(1+cos((PI3.14159265358979323846/(double)(width))*(double)k))* | 
| 341 | w1->a(samp_idx+k)); | 
| 342 | l_samp_idx = samp_idx; | 
| 343 | } | 
| 344 | lwidth = width; | 
| 345 | } | 
| 346 | w->resize(wi); | 
| 347 | |
| 348 | witem = u->create_relation("Wave")->append(); | 
| 349 | witem->set_val("wave",est_val(w)); | 
| 350 | |
| 351 | return utt; | 
| 352 | } | 
| 353 | |
| 354 | static LISP clunits_smoothedjoin_wave(LISP utt) | 
| 355 | { | 
| 356 | // Actually not very smoothed yet, just joined | 
| 357 | EST_Utterance *u = get_c_utt(utt)(utterance(utt)); | 
| 358 | EST_Wave *w = new EST_Wave; | 
| 359 | EST_Wave *w1 = 0; | 
| 360 | EST_Track *t1 = 0; | 
| 361 | EST_Item *witem = 0; | 
| 362 | EST_Item *s; | 
| 363 | int size,i,wi; | 
| 364 | int samp_end, samp_start; | 
| 365 | EST_Wave *www=0; | 
| 366 | |
| 367 | for (size=0,s=u->relation("Unit")->head(); s != 0; s = s->next()) | 
| 368 | { | 
| 369 | samp_end = s->I("samp_end"); | 
| 370 | samp_start = s->I("samp_start"); | 
| 371 | size += samp_end-samp_start; | 
| 372 | } | 
| 373 | |
| 374 | if (u->relation("Unit")->head()) | 
| 375 | { // This will copy the necessary wave features across | 
| 376 | s = u->relation("Unit")->head(); | 
| 377 | www = wave(s->f("sig")); | 
| 378 | *w = *www; | 
| 379 | } | 
| 380 | w->resize(size); // its maximum size | 
| 381 | wi=0; | 
| 382 | for (s=u->relation("Unit")->head(); s; s=s->next()) | 
| 383 | { | 
| 384 | samp_end = s->I("samp_end"); | 
| 385 | samp_start = s->I("samp_start"); | 
| 386 | w1 = wave(s->f("sig")); | 
| 387 | /* printf("%s %s %f %f %d %d\n", | 
| 388 | (const char *)s->S("name"), | 
| 389 | (const char *)s->S("fileid"), | 
| 390 | (float)samp_start/(float)w->sample_rate(), | 
| 391 | (float)samp_end/(float)w->sample_rate(), | 
| 392 | w1->num_samples(), | 
| 393 | samp_end); */ | 
| 394 | t1 = track(s->f("coefs")); | 
Value stored to 't1' is never read  | |
| 395 | for (i=samp_start; i<samp_end; i++,wi++) | 
| 396 | w->a_no_check(wi) = w1->a_no_check(i); | 
| 397 | /* printf("%d %f\n",wi,(float)wi/(float)w->sample_rate()); */ | 
| 398 | } | 
| 399 | w->resize(wi); | 
| 400 | |
| 401 | witem = u->create_relation("Wave")->append(); | 
| 402 | witem->set_val("wave",est_val(w)); | 
| 403 | |
| 404 | return utt; | 
| 405 | } | 
| 406 | |
| 407 | static void setup_clunits_params() | 
| 408 | { | 
| 409 | // Set up params | 
| 410 | clunits_params = siod_get_lval("clunits_params", | 
| 411 | "CLUNITS: no parameters set for module"); | 
| 412 | optimal_coupling = get_param_int("optimal_coupling",clunits_params,0); | 
| 413 | different_prev_pen = get_param_float("different_prev_pen",clunits_params,1000.0); | 
| 414 | non_consecutive_pen = get_param_float("non_consectutive_pen",clunits_params,100.0); | 
| 415 | extend_selections = get_param_int("extend_selections",clunits_params,0); | 
| 416 | continuity_weight = get_param_float("continuity_weight",clunits_params,1); | 
| 417 | f0_join_weight = get_param_float("f0_join_weight",clunits_params,0.0); | 
| 418 | clunits_debug = get_param_int("clunits_debug",clunits_params,0); | 
| 419 | clunits_log_scores = get_param_int("log_scores",clunits_params,0); | 
| 420 | clunits_smooth_frames = get_param_int("smooth_frames",clunits_params,0); | 
| 421 | clunit_name_feat = get_param_str("clunit_name_feat",clunits_params,"name"); | 
| 422 | selection_trees = | 
| 423 | siod_get_lval("clunits_selection_trees", | 
| 424 | "CLUNITS: clunits_selection_trees unbound"); | 
| 425 | } | 
| 426 | |
| 427 | static EST_VTCandidate *TS_candlist(EST_Item *s,EST_Features &f) | 
| 428 | { | 
| 429 | // Return a list of candidate units for target s | 
| 430 | // Use the appropriate CART to select a small group of candidates | 
| 431 | EST_VTCandidate *all_cands = 0; | 
| 432 | EST_VTCandidate *c, *gt; | 
| 433 | LISP tree,group,l,pd,cc,ls; | 
| 434 | EST_String name; | 
| 435 | EST_String lookingfor; | 
| 436 | CLunit *u; | 
| 437 | int bbb,ccc; | 
| 438 | float cluster_mean; | 
| 439 | (void)f; | 
| 440 | bbb=ccc=0; | 
| 441 | |
| 442 | lookingfor = s->S("clunit_name"); | 
| 443 | ls = siod(s); | 
| 444 | |
| 445 | cc = siod_get_lval("clunits_cand_hooks",NULL__null); | 
| 446 | if (cc) | 
| 447 | pd = apply_hooks(siod_get_lval("clunits_cand_hooks",NULL__null), | 
| 448 | ls); | 
| 449 | else | 
| 450 | { | 
| 451 | tree = car(cdr(siod_assoc_str(lookingfor,selection_trees))); | 
| 452 | pd = wagon_pd(s,tree); | 
| 453 | } | 
| 454 | if (pd == NIL((struct obj *) 0)) | 
| 455 | { | 
| 456 | cerr << "CLUNITS: no predicted class for " << | 
| 457 | s->S("clunit_name") << endl; | 
| 458 | 	festival_error()(errjmp_ok ? longjmp(*est_errjmp,1) : festival_tidy_up(),exit (-1));  | 
| 459 | } | 
| 460 | group = car(pd); | 
| 461 | cluster_mean = get_c_float(car(cdr(pd))); | 
| 462 | |
| 463 | for (bbb=0,l=group; l != NIL((struct obj *) 0); l=cdr(l),bbb++) | 
| 464 | { | 
| 465 | c = new EST_VTCandidate; | 
| 466 | name = s->S("clunit_name")+"_"+get_c_string(car(car(l))); | 
| 467 | u = cldb->get_unit(name); | 
| 468 | if (u == 0) | 
| 469 | { | 
| 470 | cerr << "CLUNITS: failed to find unit " << name << | 
| 471 | " in index" << endl; | 
| 472 | 	    festival_error()(errjmp_ok ? longjmp(*est_errjmp,1) : festival_tidy_up(),exit (-1));  | 
| 473 | } | 
| 474 | cldb->load_join_coefs(u); | 
| 475 | c->name = est_val(u); | 
| 476 | c->s = s; | 
| 477 | // Mean distance from others in cluster (could be precalculated) | 
| 478 | c->score = get_c_float(car(cdr(car(l))))-cluster_mean; | 
| 479 | c->score *= c->score; | 
| 480 | // Maybe this should be divided by overall mean of set | 
| 481 | // to normalise this figure (?) | 
| 482 | |
| 483 | c->next = all_cands; | 
| 484 | all_cands = c; | 
| 485 | } | 
| 486 | |
| 487 | if (extend_selections) | 
| 488 | { | 
| 489 | // An experiment, for all candidates of the previous | 
| 490 | // item whose following is of this phone type, include | 
| 491 | // them as a candidate | 
| 492 | EST_Item *ppp = s->prev(); | 
| 493 | if (ppp) | 
| 494 | { | 
| 495 | EST_VTCandidate *lc = vtcand(ppp->f("unit_cands")); | 
| 496 | for (ccc=0 ; lc && (ccc < extend_selections); lc = lc->next) | 
| 497 | { | 
| 498 | CLunit *unit = clunit(lc->name); | 
| 499 | CLunit *next_unit; | 
| 500 | |
| 501 | if (unit->next_unit) | 
| 502 | next_unit = unit->next_unit; | 
| 503 | else | 
| 504 | continue; | 
| 505 | EST_String ss; | 
| 506 | ss = next_unit->name.before("_"); | 
| 507 | if (ss.matches(".*_.*_.*")) | 
| 508 | { | 
| 509 | ss += "_"; | 
| 510 | ss += next_unit->name.after("_").before("_"); | 
| 511 | } | 
| 512 | /* printf("%s %s\n",(const char *)ss, (const char *)lookingfor); */ | 
| 513 | for (gt=all_cands; gt; gt=gt->next) | 
| 514 | if (clunit(gt->name)->name == next_unit->name) | 
| 515 | break; /* got this one already */ | 
| 516 | if ((ss == lookingfor) && (gt == 0)) | 
| 517 | { // its the right type so add it | 
| 518 | c = new EST_VTCandidate; | 
| 519 | c->name = est_val(next_unit); | 
| 520 | cldb->load_join_coefs(next_unit); | 
| 521 | c->s = s; | 
| 522 | c->score = 0; | 
| 523 | c->next = all_cands; | 
| 524 | all_cands = c; | 
| 525 | bbb++; | 
| 526 | ccc++; | 
| 527 | } | 
| 528 | } | 
| 529 | } | 
| 530 | |
| 531 | s->set_val("unit_cands",est_val(all_cands)); | 
| 532 | } | 
| 533 | if (clunits_debug) | 
| 534 | printf("cands %d (extends %d) %s\n",bbb,ccc,(const char *)lookingfor); | 
| 535 | return all_cands; | 
| 536 | } | 
| 537 | |
| 538 | static EST_VTPath *TS_npath(EST_VTPath *p,EST_VTCandidate *c,EST_Features &f) | 
| 539 | { | 
| 540 | // Combine candidate c with previous path updating score | 
| 541 | // with join cost | 
| 542 | float cost; | 
| 543 | EST_VTPath *np = new EST_VTPath; | 
| 544 | CLunit *u0, *u1; | 
| 545 | float u0_move=0.0, u1_move=0.0; | 
| 546 | (void)f; | 
| 547 | |
| 548 | np->c = c; | 
| 549 | np->from = p; | 
| 550 | if ((p == 0) || (p->c == 0)) | 
| 551 | cost = 0; // nothing previous to join to | 
| 552 | else | 
| 553 | { | 
| 554 | u0 = clunit(p->c->name); | 
| 555 | u1 = clunit(c->name); | 
| 556 | // printf("u0 %s u1 %s\n", | 
| 557 | // (const char *)u0->name, | 
| 558 | // (const char *)u1->name); | 
| 559 | if (optimal_coupling) | 
| 560 | cost = optimal_couple(u0,u1,u0_move,u1_move, | 
| 561 | optimal_coupling, | 
| 562 | different_prev_pen, | 
| 563 | non_consecutive_pen); | 
| 564 | else // naive measure | 
| 565 | cost = naive_join_cost(u0,u1,c->s,u0_move,u1_move); | 
| 566 | // When optimal_coupling == 2 the moves will be 0, just the scores | 
| 567 | // are relevant | 
| 568 | if (optimal_coupling == 1) | 
| 569 | { | 
| 570 | np->f.set(static_unit_prev_move,u0_move); // new (prev) end | 
| 571 | np->f.set(static_unit_this_move,u1_move); // new start | 
| 572 | } | 
| 573 | } | 
| 574 | // printf("cost %f continuity_weight %f\n", cost, continuity_weight); | 
| 575 | cost *= continuity_weight; | 
| 576 | np->state = c->pos; // "state" is candidate number | 
| 577 | if (clunits_log_scores && (cost != 0)) | 
| 578 | cost = log(cost); | 
| 579 | |
| 580 | np->f.set(static_jscore,cost); | 
| 581 | np->f.set(static_tscore,c->score); | 
| 582 | if (p==0) | 
| 583 | np->score = (c->score+cost); | 
| 584 | else | 
| 585 | np->score = (c->score+cost) + p->score; | 
| 586 | np->f.set(static_cscore,np->score); | 
| 587 | |
| 588 | if (clunits_debug > 1) | 
| 589 | printf("joining cost %f\n",np->score); | 
| 590 | return np; | 
| 591 | } | 
| 592 | |
| 593 | static float optimal_couple(CLunit *u0, | 
| 594 | CLunit *u1, | 
| 595 | float &u0_move, | 
| 596 | float &u1_move, | 
| 597 | int type, | 
| 598 | float different_prev_pen, | 
| 599 | float non_consecutive_pen | 
| 600 | ) | 
| 601 | { | 
| 602 | // Find combination cost of u0 to u1, checking for best | 
| 603 | // frame up to n frames back in u0 and u1. | 
| 604 | // Note this checks the u0 with u1's predecessor, which may or may not | 
| 605 | // be of the same type | 
| 606 | // There is some optimisation here in unit coeff access | 
| 607 | EST_Track *u0_cep, *u1_p_cep; | 
| 608 | float dist, best_val; | 
| 609 | int i,eee; | 
| 610 | int u0_st, u0_end; | 
| 611 | int u1_p_st, u1_p_end; | 
| 612 | int best_u0, best_u1; | 
| 613 | CLunit *u1_p; | 
| 614 | float f; | 
| 615 | |
| 616 | u1_p = u1->prev_unit; | 
| 617 | |
| 618 | u0_move = u0->end; | 
| 619 | if (u1_p == 0) | 
| 620 | u1_move = 0; | 
| 621 | else | 
| 622 | u1_move = u1_p->end; | 
| 623 | |
| 624 | if (u1_p == u0) // they are consecutive | 
| 625 | return 0.0; | 
| 626 | if (u1_p == 0) // hacky condition, when there is no previous we'll | 
| 627 | return 0.0; // assume a good join (should be silence there) | 
| 628 | |
| 629 | if (u1_p->join_coeffs == 0) | 
| 630 | cldb->load_join_coefs(u1_p); | 
| 631 | // Get indexes into full cep for utterances rather than sub ceps | 
| 632 | u0_cep = u0->join_coeffs; | 
| 633 | u1_p_cep = u1_p->join_coeffs; | 
| 634 | |
| 635 | u0_end = u0_cep->num_frames(); | 
| 636 | u1_p_end = u1_p_cep->num_frames(); | 
| 637 | |
| 638 | if (!streq(u1_p->base_name,u0->base_name)(strcmp(u1_p->base_name,u0->base_name)==0)) | 
| 639 | { /* prev(u1) is a different phone from u0 so don't slide */ | 
| 640 | f = different_prev_pen; | 
| 641 | u0_st = u0_cep->num_frames()-1; | 
| 642 | u1_p_st = u1_p_cep->num_frames()-1; | 
| 643 | } | 
| 644 | else if (type == 2) | 
| 645 | { /* we'll only check the edge for the join */ | 
| 646 | u0_st = u0_cep->num_frames()-1; | 
| 647 | u1_p_st = u1_p_cep->num_frames()-1; | 
| 648 | f = 1; | 
| 649 | } | 
| 650 | else | 
| 651 | { | 
| 652 | u0_st = (int)(u0_cep->num_frames() * 0.33); | 
| 653 | u1_p_st = (int)(u1_p_cep->num_frames() * 0.33); | 
| 654 | f = 1; | 
| 655 | } | 
| 656 | |
| 657 | best_u0=u0_end; | 
| 658 | best_u1=u1_p_end; | 
| 659 | best_val = HUGE_VAL(__builtin_huge_val()); | 
| 660 | |
| 661 | // Here we look for the best join without sliding the windows | 
| 662 | if ((u0_end-u0_st) < (u1_p_end-u1_p_st)) | 
| 663 | eee = u0_end-u0_st; | 
| 664 | else | 
| 665 | eee = u1_p_end-u1_p_st; | 
| 666 | for (i=0; i < eee; i++) | 
| 667 | { | 
| 668 | dist = frame_distance(*u0_cep,i+u0_st, | 
| 669 | *u1_p_cep,i+u1_p_st, | 
| 670 | cldb->cweights, | 
| 671 | f0_join_weight); | 
| 672 | if (dist < best_val) | 
| 673 | { | 
| 674 | best_val = dist; | 
| 675 | best_u0 = i+u0_st; | 
| 676 | best_u1 = i+u1_p_st; | 
| 677 | } | 
| 678 | } | 
| 679 | #if 0 | 
| 680 | // This tries *all* possible matches in the pair, its slow | 
| 681 | // and has a tendency to shorten things more than you'd like | 
| 682 | // so we just use the more simple test above. | 
| 683 | int j; | 
| 684 | for (i=u0_st; i < u0_end; i++) | 
| 685 | { | 
| 686 | for (j=u1_p_st; j < u1_p_end; j++) | 
| 687 | { | 
| 688 | dist = frame_distance(*u0_cep,i, | 
| 689 | *u1_p_cep,j, | 
| 690 | cldb->cweights); | 
| 691 | if (dist < best_val) | 
| 692 | { | 
| 693 | best_val = dist; | 
| 694 | best_u0 = i; | 
| 695 | best_u1 = j; | 
| 696 | } | 
| 697 | } | 
| 698 | } | 
| 699 | #endif | 
| 700 | |
| 701 | if (type == 1) | 
| 702 | { | 
| 703 | u0_move = u0_cep->t(best_u0); | 
| 704 | u1_move = u1_p_cep->t(best_u1); | 
| 705 | } | 
| 706 | |
| 707 | return non_consecutive_pen+(best_val*f); | 
| 708 | } | 
| 709 | |
| 710 | static float naive_join_cost(CLunit *unit0, CLunit *unit1, | 
| 711 | EST_Item *s, | 
| 712 | float &u0_move, | 
| 713 | float &u1_move) | 
| 714 | { | 
| 715 | // A naive join cost, because I haven't ported the info yet | 
| 716 | |
| 717 | u0_move = unit0->end; | 
| 718 | u1_move = unit1->start; | 
| 719 | |
| 720 | if (unit0 == unit1) | 
| 721 | return 0; | 
| 722 | else if (unit1->prev_unit->name == unit0->name) | 
| 723 | return 0; | 
| 724 | else if (ph_is_silence(s->name())) | 
| 725 | return 0; | 
| 726 | else if (ph_is_stop(s->name())) | 
| 727 | return 0.2; | 
| 728 | else if (ph_is_fricative(s->name())) | 
| 729 | return 0.3; | 
| 730 | else | 
| 731 | return 1.0; | 
| 732 | } | 
| 733 | |
| 734 | static LISP cldb_load_all_coeffs(LISP filelist) | 
| 735 | { | 
| 736 | LISP f; | 
| 737 | |
| 738 | cldb = check_cldb(); | 
| 739 | for (f=filelist; f; f=cdr(f)) | 
| 740 | { | 
| 741 | cldb->get_file_coefs_sig(get_c_string(car(f))); | 
| 742 | cldb->get_file_join_coefs(get_c_string(car(f))); | 
| 743 | } | 
| 744 | |
| 745 | return NIL((struct obj *) 0); | 
| 746 | } | 
| 747 | |
| 748 | void festival_clunits_init(void) | 
| 749 | { | 
| 750 | // Initialization for clunits selection | 
| 751 | |
| 752 | proclaim_module("clunits", | 
| 753 | "Copyright (C) University of Edinburgh and CMU 1997-2010\n"); | 
| 754 | |
| 755 | gc_protect(&clunits_params); | 
| 756 | gc_protect(&selection_trees); | 
| 757 | |
| 758 | festival_def_utt_module("Clunits_Select",clunits_select, | 
| 759 | "(Clunits_Select UTT)\n\ | 
| 760 | Select units from current databases using cluster selection method."); | 
| 761 | |
| 762 | festival_def_utt_module("Clunits_Get_Units",clunits_get_units, | 
| 763 | "(Clunits_Get_Units UTT)\n\ | 
| 764 | Construct Unit relation from the selected units in Segment and extract\n\ | 
| 765 | their parameters from the clunit db."); | 
| 766 | |
| 767 | festival_def_utt_module("Clunits_Simple_Wave",clunits_simple_wave, | 
| 768 | "(Clunits_Simple_Wave UTT)\n\ | 
| 769 | Naively concatenate signals together into a single wave (for debugging)."); | 
| 770 | |
| 771 | festival_def_utt_module("Clunits_Windowed_Wave",clunits_windowed_wave, | 
| 772 | "(Clunits_Windowed_Wave UTT)\n\ | 
| 773 | Use hamming window over edges of units to join them, no prosodic \n\ | 
| 774 | modification though."); | 
| 775 | |
| 776 | festival_def_utt_module("Clunits_SmoothedJoin_Wave",clunits_smoothedjoin_wave, | 
| 777 | "(Clunits_SmoothedJoin_Wave UTT)\n\ | 
| 778 | smoothed join."); | 
| 779 | |
| 780 | init_subr_1("clunits:load_db",cl_load_db, | 
| 781 | "(clunits:load_db PARAMS)\n\ | 
| 782 | Load index file for cluster database and set up params, and select it."); | 
| 783 | |
| 784 | init_subr_1("clunits:select",cldb_select, | 
| 785 | "(clunits:select NAME)\n\ | 
| 786 | Select a previously loaded cluster database."); | 
| 787 | |
| 788 | init_subr_1("clunits:load_all_coefs",cldb_load_all_coeffs, | 
| 789 | "(clunits:load_all_coefs FILEIDLIST)\n\ | 
| 790 | Load in coefficients, signal and join coefficients for each named\n\ | 
| 791 | fileid. This is can be called at startup to to reduce the load time\n\ | 
| 792 | during synthesis (though may make the image large)."); | 
| 793 | |
| 794 | init_subr_0("clunits:list",cldb_list, | 
| 795 | "(clunits:list)\n\ | 
| 796 | List names of currently loaded cluster databases."); | 
| 797 | |
| 798 | init_subr_2("acost:build_disttabs",make_unit_distance_tables, | 
| 799 | "(acost:build_disttabs UTTTYPES PARAMS)\n\ | 
| 800 | Built matrices of distances between each ling_item in each each list\n\ | 
| 801 | of ling_items in uttypes. Uses acoustic weights in PARAMS and save\n\ | 
| 802 | the result as a matrix for later use."); | 
| 803 | |
| 804 | init_subr_2("acost:utt.load_coeffs",acost_utt_load_coeffs, | 
| 805 | "(acost:utt.load_coeffs UTT PARAMS)\n\ | 
| 806 | Load in the acoustic coefficients into UTT and set the Acoustic_Coeffs\n\ | 
| 807 | feature for each segment in UTT."); | 
| 808 | |
| 809 | init_subr_3("acost:file_difference",ac_distance_tracks, | 
| 810 | "(acost:file_difference FILENAME1 FILENAME2 PARAMS)\n\ | 
| 811 | Load in the two named tracks and find the acoustic difference over all\n\ | 
| 812 | based on the weights in PARAMS."); | 
| 813 | |
| 814 | init_subr_2("cl_mapping", l_cl_mapping, | 
| 815 | "(cl_mapping UTT PARAMS)\n\ | 
| 816 | Impose prosody upto some percentage, and not absolutely."); | 
| 817 | |
| 818 | } |