| File: | modules/MultiSyn/DiphoneVoiceModule.cc |
| Location: | line 578, column 8 |
| Description: | Dereference of null pointer (loaded from variable 'utt') |
| 1 | /*************************************************************************/ | |||
| 2 | /* */ | |||
| 3 | /* Centre for Speech Technology Research */ | |||
| 4 | /* (University of Edinburgh, UK) and */ | |||
| 5 | /* Korin Richmond */ | |||
| 6 | /* Copyright (c) 2002 */ | |||
| 7 | /* All Rights Reserved. */ | |||
| 8 | /* */ | |||
| 9 | /* Permission is hereby granted, free of charge, to use and distribute */ | |||
| 10 | /* this software and its documentation without restriction, including */ | |||
| 11 | /* without limitation the rights to use, copy, modify, merge, publish, */ | |||
| 12 | /* distribute, sublicense, and/or sell copies of this work, and to */ | |||
| 13 | /* permit persons to whom this work is furnished to do so, subject to */ | |||
| 14 | /* the following conditions: */ | |||
| 15 | /* */ | |||
| 16 | /* 1. The code must retain the above copyright notice, this list of */ | |||
| 17 | /* conditions and the following disclaimer. */ | |||
| 18 | /* 2. Any modifications must be clearly marked as such. */ | |||
| 19 | /* 3. Original authors' names are not deleted. */ | |||
| 20 | /* 4. The authors' names are not used to endorse or promote products */ | |||
| 21 | /* derived from this software without specific prior written */ | |||
| 22 | /* permission. */ | |||
| 23 | /* */ | |||
| 24 | /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */ | |||
| 25 | /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */ | |||
| 26 | /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT */ | |||
| 27 | /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */ | |||
| 28 | /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */ | |||
| 29 | /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */ | |||
| 30 | /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */ | |||
| 31 | /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */ | |||
| 32 | /* THIS SOFTWARE. */ | |||
| 33 | /* */ | |||
| 34 | /*************************************************************************/ | |||
| 35 | /* */ | |||
| 36 | /* Author: Korin Richmond */ | |||
| 37 | /* Date: Aug 2002 */ | |||
| 38 | /* --------------------------------------------------------------------- */ | |||
| 39 | /* A diphone unit selection "voice module" */ | |||
| 40 | /* (implemented using a list of utterance objects) */ | |||
| 41 | /*************************************************************************/ | |||
| 42 | ||||
| 43 | #include "DiphoneVoiceModule.h" | |||
| 44 | #include "EST_TargetCost.h" | |||
| 45 | #include "EST_viterbi.h" | |||
| 46 | #include "EST_rw_status.h" | |||
| 47 | #include "EST_Track.h" | |||
| 48 | #include "EST_track_aux.h" | |||
| 49 | #include "EST_Wave.h" | |||
| 50 | #include "EST_THash.h" | |||
| 51 | #include "EST_TList.h" | |||
| 52 | #include "EST_types.h" | |||
| 53 | #include "ling_class/EST_Utterance.h" | |||
| 54 | #include "siod.h" | |||
| 55 | #include "siod_est.h" | |||
| 56 | #include "safety.h" | |||
| 57 | #include <cstdlib> | |||
| 58 | ||||
| 59 | #include "EST_Val.h" | |||
| 60 | ||||
| 61 | // from src/modules/UniSyn_diphone/us_diphone.h | |||
| 62 | // this won't be staying here long... | |||
| 63 | void parse_diphone_times(EST_Relation &diphone_stream, | |||
| 64 | EST_Relation &source_lab); | |||
| 65 | ||||
| 66 | SIOD_REGISTER_CLASS(du_voicemodule,DiphoneVoiceModule)class DiphoneVoiceModule *du_voicemodule(LISP x) { return du_voicemodule (val(x)); } int du_voicemodule_p(LISP x) { if (val_p(x) && (val_type_du_voicemodule == val(x).type())) return (1==1); else return (1==0); } LISP siod(const class DiphoneVoiceModule *v ) { if (v == 0) return ((struct obj *) 0); else return siod(est_val (v)); } | |||
| 67 | VAL_REGISTER_CLASS(du_voicemodule,DiphoneVoiceModule)val_type val_type_du_voicemodule="du_voicemodule"; class DiphoneVoiceModule *du_voicemodule(const EST_Val &v) { if (v.type() == val_type_du_voicemodule ) return (class DiphoneVoiceModule *)v.internal_ptr(); else ( EST_error_where = __null), (*EST_error_func)("val not of type val_type_" "du_voicemodule"); return __null; } static void val_delete_du_voicemodule (void *v) { delete (class DiphoneVoiceModule *)v; } EST_Val est_val (const class DiphoneVoiceModule *v) { return EST_Val(val_type_du_voicemodule , (void *)v,val_delete_du_voicemodule); } | |||
| 68 | ||||
| 69 | VAL_REGISTER_CLASS(diphonecandidate,DiphoneCandidate)val_type val_type_diphonecandidate="diphonecandidate"; class DiphoneCandidate *diphonecandidate(const EST_Val &v) { if (v.type() == val_type_diphonecandidate ) return (class DiphoneCandidate *)v.internal_ptr(); else (EST_error_where = __null), (*EST_error_func)("val not of type val_type_""diphonecandidate" ); return __null; } static void val_delete_diphonecandidate(void *v) { delete (class DiphoneCandidate *)v; } EST_Val est_val( const class DiphoneCandidate *v) { return EST_Val(val_type_diphonecandidate , (void *)v,val_delete_diphonecandidate); } | |||
| 70 | ||||
| 71 | // defined in a single place to avoid inconsistency. | |||
| 72 | // Given a phone segment item, return the standard cut point | |||
| 73 | // time, calculated in the standard way. | |||
| 74 | float getJoinTime( const EST_Item *seg ) | |||
| 75 | { | |||
| 76 | float midt=0.0; | |||
| 77 | ||||
| 78 | // hack to avoid overhead of string creation and deletion | |||
| 79 | // (EST feature access should really be changed to take | |||
| 80 | // const char* instead of const EST_String& ) | |||
| 81 | static const EST_String cl_end_str( "cl_end" ); | |||
| 82 | static const EST_String dipth_str( "dipth" ); | |||
| 83 | static const EST_String start_str( "start" ); | |||
| 84 | ||||
| 85 | // work out boundary for diphone join | |||
| 86 | if( seg->f_present(cl_end_str) ) // join at cl_end point for stops | |||
| 87 | midt = seg->features().val("cl_end").Float(); | |||
| 88 | else if( seg->f_present(dipth_str) ) // join at 25% through a diphthong | |||
| 89 | midt = 0.75*seg->F(start_str) | |||
| 90 | + 0.25*seg->features().val("end").Float(); | |||
| 91 | else | |||
| 92 | midt = ( seg->F(start_str) | |||
| 93 | + seg->features().val("end").Float() ) / 2.0; | |||
| 94 | ||||
| 95 | return midt; | |||
| 96 | } | |||
| 97 | ||||
| 98 | DiphoneVoiceModule::DiphoneVoiceModule( const EST_StrList& basenames, | |||
| 99 | const EST_String& uttDir, | |||
| 100 | const EST_String& wavDir, | |||
| 101 | const EST_String& pmDir, | |||
| 102 | const EST_String& coefDir, | |||
| 103 | unsigned int sr, | |||
| 104 | const EST_String& uttExt, | |||
| 105 | const EST_String& wavExt, | |||
| 106 | const EST_String& pmExt, | |||
| 107 | const EST_String& coefExt ) | |||
| 108 | ||||
| 109 | : fileList( basenames ), | |||
| 110 | utt_dir ( uttDir ), | |||
| 111 | utt_ext ( uttExt ), | |||
| 112 | pm_dir( pmDir ), | |||
| 113 | pm_ext( pmExt ), | |||
| 114 | coef_dir( coefDir ), | |||
| 115 | coef_ext( coefExt ), | |||
| 116 | wave_dir( wavDir ), | |||
| 117 | wave_ext( wavExt ), | |||
| 118 | wav_srate( sr ), | |||
| 119 | tcdatahash ( 0 ), | |||
| 120 | utt_dbase( 0 ), | |||
| 121 | catalogue( 0 ) | |||
| 122 | { | |||
| 123 | ||||
| 124 | } | |||
| 125 | ||||
| 126 | void DiphoneVoiceModule::addCoefficients( EST_Relation *segs, const EST_Track& coefs ) | |||
| 127 | { | |||
| 128 | float startt, midt, endt; | |||
| 129 | EST_FVector *startf, *midf, *endf; | |||
| 130 | const int num_coefs = coefs.num_channels(); | |||
| 131 | ||||
| 132 | // hack to avoid overhead of string creation and deletion | |||
| 133 | // (EST feature access should really be changed to take | |||
| 134 | // const char* instead of const EST_String& ) | |||
| 135 | static const EST_String startcoef_str("startcoef"); | |||
| 136 | static const EST_String midcoef_str("midcoef"); | |||
| 137 | static const EST_String endcoef_str("endcoef"); | |||
| 138 | static const EST_String start_str("start"); | |||
| 139 | ||||
| 140 | EST_Item *seg=segs->head(); | |||
| 141 | startt = seg->F(start_str); | |||
| 142 | ||||
| 143 | startf = new EST_FVector(num_coefs); | |||
| 144 | CHECK_PTR(startf)if((startf)==0){ (EST_error_where = __null), (*EST_error_func )("memory allocation failed (file %s, line %d)", "DiphoneVoiceModule.cc" ,144);}; | |||
| 145 | coefs.copy_frame_out(coefs.index(startt), *startf); //this one not shared | |||
| 146 | ||||
| 147 | for( ; seg!=0; seg=seg->next() ){ | |||
| 148 | ||||
| 149 | // work out boundary for diphone join | |||
| 150 | midt = getJoinTime( seg ); | |||
| 151 | ||||
| 152 | // copy frames out and set as features | |||
| 153 | seg->features().set_val( startcoef_str, est_val(startf) ); | |||
| 154 | ||||
| 155 | midf = new EST_FVector(num_coefs); | |||
| 156 | CHECK_PTR(midf)if((midf)==0){ (EST_error_where = __null), (*EST_error_func)( "memory allocation failed (file %s, line %d)", "DiphoneVoiceModule.cc" ,156);}; | |||
| 157 | coefs.copy_frame_out(coefs.index(midt), *midf); | |||
| 158 | seg->features().set_val( midcoef_str, est_val(midf) ); | |||
| 159 | ||||
| 160 | endt = seg->features().val("end").Float(); | |||
| 161 | endf = new EST_FVector(num_coefs); | |||
| 162 | CHECK_PTR(endf)if((endf)==0){ (EST_error_where = __null), (*EST_error_func)( "memory allocation failed (file %s, line %d)", "DiphoneVoiceModule.cc" ,162);}; | |||
| 163 | coefs.copy_frame_out(coefs.index(endt), *endf); | |||
| 164 | seg->features().set_val( endcoef_str, est_val(endf) ); | |||
| 165 | ||||
| 166 | startf = endf; // phones share frame at phone boundary (reference counted in EST_Val) | |||
| 167 | } | |||
| 168 | } | |||
| 169 | ||||
| 170 | void DiphoneVoiceModule::flatPack( EST_Relation *segs, | |||
| 171 | const EST_TargetCost *tc ) const | |||
| 172 | { | |||
| 173 | ||||
| 174 | const EST_FlatTargetCost *ftc = (EST_FlatTargetCost *)tc; | |||
| 175 | ||||
| 176 | for( EST_Item *seg=segs->head(); seg->next() !=0; seg=seg->next() ) | |||
| 177 | tcdatahash->add_item(seg, ftc->flatpack(seg)); | |||
| 178 | ||||
| 179 | } | |||
| 180 | ||||
| 181 | void DiphoneVoiceModule::initialise( const EST_TargetCost *tc, bool ignore_bad_tag ) | |||
| 182 | { | |||
| 183 | EST_Utterance *u=0; | |||
| 184 | EST_Relation *segs=0; | |||
| 185 | ||||
| 186 | tcdatahash = new TCDataHash(500); | |||
| 187 | ||||
| 188 | utt_dbase = new EST_TList<EST_Utterance *>; | |||
| 189 | CHECK_PTR(utt_dbase)if((utt_dbase)==0){ (EST_error_where = __null), (*EST_error_func )("memory allocation failed (file %s, line %d)", "DiphoneVoiceModule.cc" ,189);}; | |||
| 190 | ||||
| 191 | catalogue = new EST_TStringHash<ItemList*>( 2500 ); | |||
| 192 | CHECK_PTR(catalogue)if((catalogue)==0){ (EST_error_where = __null), (*EST_error_func )("memory allocation failed (file %s, line %d)", "DiphoneVoiceModule.cc" ,192);}; | |||
| 193 | ||||
| 194 | int numIgnoredPhones=0; | |||
| 195 | ||||
| 196 | if(ignore_bad_tag) | |||
| 197 | EST_warning(EST_error_where = __null), (*EST_warning_func)( "Looking for bad flags"); | |||
| 198 | else | |||
| 199 | EST_warning(EST_error_where = __null), (*EST_warning_func)( "Ignoring bad flags"); | |||
| 200 | ||||
| 201 | ||||
| 202 | for( EST_Litem *it=fileList.head(); it!=0 ; it=it->next() ){ | |||
| 203 | u = new EST_Utterance; | |||
| 204 | CHECK_PTR(u)if((u)==0){ (EST_error_where = __null), (*EST_error_func)("memory allocation failed (file %s, line %d)" , "DiphoneVoiceModule.cc",204);}; | |||
| 205 | ||||
| 206 | if( (u->load(utt_dir+fileList(it)+utt_ext)) != read_ok ) | |||
| 207 | EST_error(EST_error_where = __null), (*EST_error_func)( "Couldn't load utterance %s\n", | |||
| 208 | (const char*)fileList(it) ); | |||
| 209 | ||||
| 210 | segs = u->relation( "Segment" ); | |||
| 211 | ||||
| 212 | // add join cost coefficients (at middle of phones) | |||
| 213 | EST_Track coefs; | |||
| 214 | if( (coefs.load((coef_dir+fileList(it)+coef_ext))) != read_ok ) | |||
| 215 | EST_error(EST_error_where = __null), (*EST_error_func)( "Couldn't load data file %s", | |||
| 216 | (const char*) (coef_dir+fileList(it)+coef_ext) ); | |||
| 217 | ||||
| 218 | addCoefficients( segs, coefs ); | |||
| 219 | ||||
| 220 | if (tc->is_flatpack()) | |||
| 221 | { | |||
| 222 | flatPack(segs,tc); | |||
| 223 | u->remove_relation("Token"); | |||
| 224 | u->remove_relation("Word"); | |||
| 225 | u->remove_relation("Phrase"); | |||
| 226 | u->remove_relation("Syllable"); | |||
| 227 | u->remove_relation("SylStructure"); | |||
| 228 | u->remove_relation("IntEvent"); | |||
| 229 | u->remove_relation("Intonation"); | |||
| 230 | } | |||
| 231 | ||||
| 232 | addToCatalogue( u, &numIgnoredPhones, ignore_bad_tag ); | |||
| 233 | utt_dbase->append( u ); | |||
| 234 | } | |||
| 235 | ||||
| 236 | if(ignore_bad_tag) | |||
| 237 | EST_warning(EST_error_where = __null), (*EST_warning_func)( "Ignored %d phones with bad flag set\n", numIgnoredPhones ); | |||
| 238 | } | |||
| 239 | ||||
| 240 | DiphoneVoiceModule::~DiphoneVoiceModule() | |||
| 241 | { | |||
| 242 | if( utt_dbase != 0 ){ | |||
| 243 | EST_Litem *it = utt_dbase->head(); | |||
| 244 | for( ; it!=0 ; it=it->next() ) | |||
| 245 | delete (*utt_dbase)(it); | |||
| 246 | delete utt_dbase; | |||
| 247 | } | |||
| 248 | ||||
| 249 | delete catalogue; | |||
| 250 | ||||
| 251 | if(tcdatahash) | |||
| 252 | delete tcdatahash; | |||
| 253 | ||||
| 254 | } | |||
| 255 | ||||
| 256 | void DiphoneVoiceModule::addToCatalogue( const EST_Utterance *utt, int *num_ignored, bool ignore_bad ) | |||
| 257 | { | |||
| 258 | EST_Item *item, *next_item; | |||
| 259 | ItemList *diphoneList; | |||
| 260 | const EST_String *ph1, *ph2; | |||
| 261 | int found=0; | |||
| 262 | ||||
| 263 | static const EST_String bad_str( "bad" ); | |||
| 264 | ||||
| 265 | item = (utt->relation( "Segment" ))->tail(); | |||
| 266 | if( item!=0 ){ | |||
| 267 | ph2 = &(item->features().val("name").String()); | |||
| 268 | ||||
| 269 | while( (item=item->prev()) != 0 ){ | |||
| 270 | ||||
| 271 | next_item = item->next(); | |||
| 272 | ||||
| 273 | // You'd think we need to check both item->f_present(bad_str) and | |||
| 274 | // next_item->f_present(bad_str) like this: | |||
| 275 | //if((item->f_present(bad_str) || next_item->f_present(bad_str)) && ignore_bad == true){ | |||
| 276 | // But experiment showed that then each time one diphone too many would be | |||
| 277 | // ignored. This was partly compensated by a bug pesent up to r1.14 | |||
| 278 | // (a iteration within "if(item=item->prev()!=0)" just before the "continue") | |||
| 279 | // which caused the leftmost bad phone in a row of bad phones NOT to be ignored | |||
| 280 | // when the length of the row was even (or when it was odd and ended in the | |||
| 281 | // utterance-final phone, which is never checked for badness). | |||
| 282 | if(item->f_present(bad_str) && ignore_bad == true){ | |||
| 283 | ||||
| 284 | (*num_ignored)++; | |||
| 285 | ||||
| 286 | EST_warning(EST_error_where = __null), (*EST_warning_func)( "Ignoring diphone \"%s_%s\" (LEFT %s in %s at %fs, bad flag \"%s\")", | |||
| 287 | item->S("name").str(), | |||
| 288 | next_item->S("name").str(), | |||
| 289 | item->S("name").str(), | |||
| 290 | utt->f.S("fileid").str(), | |||
| 291 | item->F("end"), | |||
| 292 | item->S("bad").str() ); | |||
| 293 | ||||
| 294 | if(item->prev() != 0){ | |||
| 295 | continue; | |||
| 296 | } | |||
| 297 | else | |||
| 298 | break; //already at start of list, so finish up | |||
| 299 | } | |||
| 300 | ||||
| 301 | ph1 = &(item->features().val("name").String()); | |||
| 302 | ||||
| 303 | // EST_warning( "Adding phone \"%s\" (%s, %f) to diphoneList %s_%s", | |||
| 304 | // item->S("name").str(), | |||
| 305 | // utt->f.S("fileid").str(), | |||
| 306 | // item->F("end"), | |||
| 307 | // item->S("name").str(), | |||
| 308 | // next_item->S("name").str()); | |||
| 309 | ||||
| 310 | diphoneList = catalogue->val(EST_String::cat(*ph1,"_",*ph2), found); | |||
| 311 | ||||
| 312 | if( !found ){ | |||
| 313 | diphoneList = new ItemList; | |||
| 314 | CHECK_PTR(diphoneList)if((diphoneList)==0){ (EST_error_where = __null), (*EST_error_func )("memory allocation failed (file %s, line %d)", "DiphoneVoiceModule.cc" ,314);}; | |||
| 315 | catalogue->add_item(EST_String::cat(*ph1,"_",*ph2), diphoneList, 1); // no_search=1 | |||
| 316 | } | |||
| 317 | ||||
| 318 | diphoneList->append( item ); | |||
| 319 | ||||
| 320 | ph2 = ph1; | |||
| 321 | } | |||
| 322 | } | |||
| 323 | } | |||
| 324 | ||||
| 325 | void DiphoneVoiceModule::getDiphone( const EST_Item *phone1, | |||
| 326 | EST_Track* coef, EST_Wave* sig, int *midframe, | |||
| 327 | bool extendLeft, bool extendRight ) const | |||
| 328 | { | |||
| 329 | EST_Item *phone2 = phone1->next(); | |||
| 330 | ||||
| 331 | // load the relevant parts | |||
| 332 | const EST_String &fname = phone1->relation()->utt()->f.val("fileid").String(); | |||
| 333 | ||||
| 334 | static const EST_String start_str( "start" ); | |||
| 335 | ||||
| 336 | float startt,midt,endt; | |||
| 337 | ||||
| 338 | if( extendLeft==true ) | |||
| 339 | startt = phone1->F(start_str); | |||
| 340 | else | |||
| 341 | startt = getJoinTime( phone1 ); | |||
| 342 | ||||
| 343 | midt = phone1->features().val("end").Float(); | |||
| 344 | ||||
| 345 | if( extendRight==true ) | |||
| 346 | endt = phone2->features().val("end").Float(); | |||
| 347 | else | |||
| 348 | endt = getJoinTime( phone2 ); | |||
| 349 | ||||
| 350 | // get pitchmarks for pitch synchronous synthesis | |||
| 351 | EST_Track *tempcoef = new EST_Track; | |||
| 352 | CHECK_PTR(tempcoef)if((tempcoef)==0){ (EST_error_where = __null), (*EST_error_func )("memory allocation failed (file %s, line %d)", "DiphoneVoiceModule.cc" ,352);}; | |||
| 353 | if( (tempcoef->load((pm_dir+fname+pm_ext))) != read_ok ) | |||
| 354 | EST_error(EST_error_where = __null), (*EST_error_func)( "Couldn't load data file %s", | |||
| 355 | (const char*) (pm_dir+fname+pm_ext) ); | |||
| 356 | ||||
| 357 | // following few lines effectively moves segment boundaries to | |||
| 358 | // line up with pitch periods. | |||
| 359 | int copy_start = tempcoef->index( startt ); | |||
| 360 | int copy_end = tempcoef->index( endt ); | |||
| 361 | //copy_end -= 1; //so that adjacent units don't start and end with same frame | |||
| 362 | ||||
| 363 | int copy_len = copy_end - copy_start; | |||
| 364 | //int copy_len = copy_end - copy_start + 1; | |||
| 365 | ||||
| 366 | startt = tempcoef->t( copy_start ); | |||
| 367 | endt = tempcoef->t( copy_end ); | |||
| 368 | ||||
| 369 | if( copy_len == 0 ){ | |||
| 370 | EST_warning(EST_error_where = __null), (*EST_warning_func)( "%s(%f->%f): %s_%s diphone length means 1 pitchmark will be duplicated", | |||
| 371 | fname.str(), startt, endt, phone1->S("name").str(), phone2->S("name").str() ); | |||
| 372 | copy_len=1; | |||
| 373 | } | |||
| 374 | else if( copy_len < 0 ){ | |||
| 375 | EST_error(EST_error_where = __null), (*EST_error_func)( "%s(%f->%f): %s_%s diphone length renders %d pitchmark", | |||
| 376 | fname.str(), startt, endt, phone1->S("name").str(), phone2->S("name").str(), copy_len ); | |||
| 377 | } | |||
| 378 | ||||
| 379 | tempcoef->copy_sub_track( *coef, copy_start, copy_len ); | |||
| 380 | ||||
| 381 | *midframe = coef->index( midt ); | |||
| 382 | ||||
| 383 | // adjust timing, which Festival synthesis code makes assumptions about | |||
| 384 | // SPECIFICALLY, the unisyn module wants all units to start from | |||
| 385 | // the first value above 0.0 (as the first pitch mark) | |||
| 386 | float t_off = (copy_start!=0) ? tempcoef->t(copy_start-1) : 0.0; | |||
| 387 | int nframes = coef->num_frames(); | |||
| 388 | for( int i=0; i<nframes; ++i ) | |||
| 389 | coef->t(i) -= t_off; | |||
| 390 | ||||
| 391 | //start waveform at previous pitchmark (this is period approximation used) | |||
| 392 | int st_sample = (int)rint( t_off * (float) wav_srate ); | |||
| 393 | ||||
| 394 | //preferably end waveform at following pitchmark (follows convention in UniSyn module) | |||
| 395 | int end_sample; | |||
| 396 | if( copy_end < tempcoef->num_frames() ) | |||
| 397 | end_sample = (int) rint( tempcoef->t(copy_end) * (float) wav_srate ); | |||
| 398 | //if( copy_end+1 < tempcoef->num_frames() ) | |||
| 399 | // end_sample = (int) rint( tempcoef->t(copy_end+1) * (float) wav_srate ); | |||
| 400 | else{ | |||
| 401 | // estimate from previous pitch mark shift | |||
| 402 | int pp_centre_sample = (int) rint( endt * (float) wav_srate ); | |||
| 403 | int pp_first_sample = (int) rint( tempcoef->t(copy_end) * (float) wav_srate ); | |||
| 404 | //int pp_first_sample = (int) rint( tempcoef->t(copy_end-1) * (float) wav_srate ); | |||
| 405 | end_sample = (2*pp_centre_sample)-pp_first_sample; | |||
| 406 | } | |||
| 407 | ||||
| 408 | // (obviously, we would want to load and cache any files // | |||
| 409 | // which haven't been loaded yet, rather than just load // | |||
| 410 | // the parts each and every time) // | |||
| 411 | if( sig->load( wave_dir+fname+wave_ext, // | |||
| 412 | st_sample, end_sample-st_sample+1) != read_ok ) // | |||
| 413 | EST_error(EST_error_where = __null), (*EST_error_func)( "Couldn't load data file %s", // | |||
| 414 | (const char*) (wave_dir+fname+wave_ext) ); // | |||
| 415 | ||||
| 416 | delete tempcoef; | |||
| 417 | } | |||
| 418 | ||||
| 419 | ||||
| 420 | inline EST_VTCandidate* makeCandidate( const EST_Item *target_ph1, | |||
| 421 | const EST_Item *cand_ph1, | |||
| 422 | const EST_TargetCost *tc, | |||
| 423 | const TCData *tcd, | |||
| 424 | const TCDataHash *tcdatahash, | |||
| 425 | float tc_weight, | |||
| 426 | const DiphoneVoiceModule *dvm_p ) | |||
| 427 | { | |||
| 428 | // hack to avoid overhead of string creation and deletion | |||
| 429 | // (EST feature access should really be changed to take | |||
| 430 | // const char* instead of const EST_String& ) | |||
| 431 | static const EST_String extendLeft_str("extendLeft"); | |||
| 432 | static const EST_String extendRight_str("extendRight"); | |||
| 433 | static const EST_String jccid_str("jccid"); | |||
| 434 | ||||
| 435 | EST_VTCandidate *c = new EST_VTCandidate; | |||
| 436 | CHECK_PTR(c)if((c)==0){ (EST_error_where = __null), (*EST_error_func)("memory allocation failed (file %s, line %d)" , "DiphoneVoiceModule.cc",436);}; | |||
| 437 | ||||
| 438 | EST_Item *cand_ph2 = cand_ph1->next(); | |||
| 439 | ||||
| 440 | // set up all the members we can here | |||
| 441 | c->s = const_cast<EST_Item*>(cand_ph1); | |||
| 442 | ||||
| 443 | EST_FVector *left, *right; | |||
| 444 | if( target_ph1->f_present( extendLeft_str ) ) | |||
| 445 | left = fvector( cand_ph1->features().val( "startcoef" ) ); | |||
| 446 | else | |||
| 447 | left = fvector( cand_ph1->features().val( "midcoef" ) ); | |||
| 448 | ||||
| 449 | if( target_ph1->next()->f_present( extendRight_str ) ) | |||
| 450 | right = fvector( cand_ph2->features().val( "endcoef" ) ); | |||
| 451 | else | |||
| 452 | right = fvector( cand_ph2->features().val( "midcoef" ) ); | |||
| 453 | ||||
| 454 | // an abuse of the "name" EST_Val member to store data we want instead | |||
| 455 | // of what is intended to go there | |||
| 456 | // (will become unnecessary with a more general candidate class) | |||
| 457 | DiphoneCandidate *cand = new DiphoneCandidate( cand_ph1, dvm_p, left, right ); | |||
| 458 | CHECK_PTR(cand)if((cand)==0){ (EST_error_where = __null), (*EST_error_func)( "memory allocation failed (file %s, line %d)", "DiphoneVoiceModule.cc" ,458);}; | |||
| 459 | c->name = est_val( cand ); //to get synthesis parameters (deleted by EST_Val c->name) | |||
| 460 | ||||
| 461 | if( cand_ph1->f_present( jccid_str ) ){ | |||
| 462 | cand->ph1_jccid = cand_ph1->features().val( "jccid" ).Int(); | |||
| 463 | cand->ph1_jccindex = cand_ph1->features().val( "jccindex" ).Int(); | |||
| 464 | } | |||
| 465 | ||||
| 466 | if( cand_ph2->f_present( jccid_str ) ){ | |||
| 467 | cand->ph1_jccid = cand_ph2->features().val( "jccid" ).Int(); | |||
| 468 | cand->ph1_jccindex = cand_ph2->features().val( "jccindex" ).Int(); | |||
| 469 | } | |||
| 470 | ||||
| 471 | if(tc->is_flatpack()) | |||
| 472 | c->score = tc_weight* | |||
| 473 | ((const EST_FlatTargetCost *)tc) | |||
| 474 | ->operator()( tcd, | |||
| 475 | tcdatahash->val( const_cast<EST_Item*>(cand_ph1) ) ); | |||
| 476 | else | |||
| 477 | c->score = tc_weight*tc->operator()( target_ph1, cand_ph1 ); | |||
| 478 | ||||
| 479 | ||||
| 480 | return c; | |||
| 481 | } | |||
| 482 | ||||
| 483 | inline void itemListToCandidateList( ItemList::Entries &it, | |||
| 484 | EST_VTCandidate **head, | |||
| 485 | EST_VTCandidate **tail, | |||
| 486 | const EST_Item *target_ph1, | |||
| 487 | const EST_TargetCost *tc, | |||
| 488 | const TCDataHash *tcdh, | |||
| 489 | const TCDataHash *tcdatahash, | |||
| 490 | float tc_weight, | |||
| 491 | int count, | |||
| 492 | const DiphoneVoiceModule *dvm_p ) | |||
| 493 | ||||
| 494 | { | |||
| 495 | int i=0; | |||
| 496 | ||||
| 497 | if( count > 0 ){ | |||
| 498 | TCData *tcd = tcdh->val( const_cast<EST_Item*>(target_ph1) ); | |||
| 499 | EST_VTCandidate *nextc = 0; | |||
| 500 | ||||
| 501 | // make last one first | |||
| 502 | EST_VTCandidate *c = makeCandidate( target_ph1, (*it), tc, tcd, tcdatahash, tc_weight, dvm_p ); | |||
| 503 | c->next = nextc; | |||
| 504 | *tail = c; | |||
| 505 | ||||
| 506 | // then iterate back prepending to linked list | |||
| 507 | // (order reversed because using c->next) | |||
| 508 | nextc = c; | |||
| 509 | it++; i++; | |||
| 510 | for( ; (it && i<count); it++, i++ ){ | |||
| 511 | c = makeCandidate( target_ph1, (*it), tc, tcd, tcdatahash, tc_weight, dvm_p ); | |||
| 512 | c->next = nextc; | |||
| 513 | nextc = c; | |||
| 514 | } | |||
| 515 | ||||
| 516 | *head = c; // keep hold of last one set up | |||
| 517 | } | |||
| 518 | ||||
| 519 | return; | |||
| 520 | } | |||
| 521 | ||||
| 522 | int DiphoneVoiceModule::getCandidateList( const EST_Item& target, | |||
| 523 | const EST_TargetCost* tc, | |||
| 524 | const TCDataHash *tcdh, | |||
| 525 | float tc_weight, | |||
| 526 | EST_VTCandidate **head, | |||
| 527 | EST_VTCandidate **tail ) const | |||
| 528 | { | |||
| 529 | int nfound = 0; | |||
| 530 | const EST_Item *target_ph1 = item(target.f("ph1")); | |||
| 531 | ||||
| 532 | int found = 0; | |||
| 533 | const ItemList *candidateItemList = catalogue->val( target.S("name"), found ); | |||
| 534 | if( found != 0 ){ | |||
| 535 | nfound = candidateItemList->length(); | |||
| 536 | ||||
| 537 | ItemList::Entries it = ItemList::Entries(*candidateItemList); | |||
| 538 | ||||
| 539 | itemListToCandidateList( it, | |||
| 540 | head, tail, | |||
| 541 | target_ph1, | |||
| 542 | tc, tcdh, tcdatahash, tc_weight, | |||
| 543 | nfound, this ); | |||
| 544 | } | |||
| 545 | ||||
| 546 | return nfound; | |||
| 547 | } | |||
| 548 | ||||
| 549 | ||||
| 550 | int DiphoneVoiceModule::getPhoneList( const EST_String &phone, ItemList &list ) | |||
| 551 | { | |||
| 552 | unsigned int n=0; | |||
| 553 | ||||
| 554 | if( utt_dbase != 0 ){ | |||
| 555 | for( EST_Litem *it=utt_dbase->head(); it!=0 ; it=it->next() ){ | |||
| 556 | EST_Item *ph=(*utt_dbase)(it)->relation("Segment")->head(); | |||
| 557 | for( ; ph!=0; ph=ph->next() ){ | |||
| 558 | if( ph->S("name") == phone ){ | |||
| 559 | list.append( ph ); | |||
| 560 | n++; | |||
| 561 | } | |||
| 562 | } | |||
| 563 | } | |||
| 564 | } | |||
| 565 | ||||
| 566 | return n; | |||
| 567 | } | |||
| 568 | ||||
| 569 | bool DiphoneVoiceModule::getUtterance( EST_Utterance** utt, int n ) const | |||
| 570 | { | |||
| 571 | if( n<0 || n>(utt_dbase->length()-1) ) | |||
| ||||
| 572 | EST_error(EST_error_where = __null), (*EST_error_func)( "Utterance index out of bounds" ); | |||
| 573 | ||||
| 574 | if( utt == 0 ) | |||
| 575 | EST_error(EST_error_where = __null), (*EST_error_func)( "Invalid utterance" ); | |||
| 576 | ||||
| 577 | // deep copy the utterance in question | |||
| 578 | *utt = new EST_Utterance( *(utt_dbase->nth(n)) ); | |||
| ||||
| 579 | CHECK_PTR(utt)if((utt)==0){ (EST_error_where = __null), (*EST_error_func)("memory allocation failed (file %s, line %d)" , "DiphoneVoiceModule.cc",579);}; | |||
| 580 | ||||
| 581 | return true; | |||
| 582 | } | |||
| 583 | ||||
| 584 | ||||
| 585 | bool DiphoneVoiceModule::getUtterance( EST_Utterance **utt, | |||
| 586 | const EST_String &feat_name, | |||
| 587 | const EST_Val &value ) const | |||
| 588 | { | |||
| 589 | //search down list of utterance structures, comparing | |||
| 590 | // fileid feature. If find a match, return pointer to that | |||
| 591 | // utterance. | |||
| 592 | for( EST_Litem *it=utt_dbase->head(); it!=0 ; it=it->next() ) | |||
| 593 | if( (*utt_dbase)(it)->f.val(feat_name) == value ){ | |||
| 594 | *utt = (*utt_dbase)(it); | |||
| 595 | return true; | |||
| 596 | } | |||
| 597 | ||||
| 598 | return false; | |||
| 599 | } | |||
| 600 | ||||
| 601 | void DiphoneVoiceModule::getDiphoneCoverageStats(EST_DiphoneCoverage *dc) const | |||
| 602 | { | |||
| 603 | for( EST_Litem *it=utt_dbase->head(); it!=0 ; it=it->next() ) | |||
| 604 | dc->add_stats((*utt_dbase)(it)); | |||
| 605 | } | |||
| 606 | ||||
| 607 | ||||
| 608 | ||||
| 609 | unsigned int DiphoneVoiceModule::numUnitTypes() const | |||
| 610 | { | |||
| 611 | return catalogue ? catalogue->num_entries() : 0; | |||
| 612 | } | |||
| 613 | ||||
| 614 | unsigned int DiphoneVoiceModule::numModuleUnits() const | |||
| 615 | { | |||
| 616 | unsigned int sum=0; | |||
| 617 | ||||
| 618 | if( catalogue != 0 ){ | |||
| 619 | EST_TStringHash<ItemList*>::Entries it; | |||
| 620 | ||||
| 621 | for( it.begin( *catalogue ); it; it++ ) | |||
| 622 | sum += it->v->length(); //EST_UList.length() counts the entries :( | |||
| 623 | } | |||
| 624 | ||||
| 625 | return sum; | |||
| 626 | } | |||
| 627 | ||||
| 628 | ||||
| 629 | unsigned int DiphoneVoiceModule::numAvailableCandidates( const EST_String &unit ) const | |||
| 630 | { | |||
| 631 | int number=0; | |||
| 632 | ||||
| 633 | int found=0; | |||
| 634 | const ItemList *candidateItemList = catalogue->val( unit, found ); | |||
| 635 | ||||
| 636 | if( found > 0 ) | |||
| 637 | number = candidateItemList->length(); | |||
| 638 | ||||
| 639 | return number; | |||
| 640 | } |