Bug Summary

File:modules/MultiSyn/DiphoneUnitVoice.cc
Location:line 379, column 10
Description:Called C++ object pointer is null

Annotated Source Code

1/*************************************************************************/
2/* */
3/* Centre for Speech Technology Research */
4/* (University of Edinburgh, UK) and */
5/* Korin Richmond */
6/* Copyright (c) 2002 */
7/* All Rights Reserved. */
8/* */
9/* Permission is hereby granted, free of charge, to use and distribute */
10/* this software and its documentation without restriction, including */
11/* without limitation the rights to use, copy, modify, merge, publish, */
12/* distribute, sublicense, and/or sell copies of this work, and to */
13/* permit persons to whom this work is furnished to do so, subject to */
14/* the following conditions: */
15/* */
16/* 1. The code must retain the above copyright notice, this list of */
17/* conditions and the following disclaimer. */
18/* 2. Any modifications must be clearly marked as such. */
19/* 3. Original authors' names are not deleted. */
20/* 4. The authors' names are not used to endorse or promote products */
21/* derived from this software without specific prior written */
22/* permission. */
23/* */
24/* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
25/* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
26/* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT */
27/* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
28/* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
29/* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
30/* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
31/* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
32/* THIS SOFTWARE. */
33/* */
34/*************************************************************************/
35/* */
36/* Author: Korin Richmond */
37/* Date: Aug 2002 */
38/* --------------------------------------------------------------------- */
39/* first stab at a diphone unit selection "voice" - using a list of */
40/* utterance objects */
41/*************************************************************************/
42
43#include "festival.h"
44#include "DiphoneUnitVoice.h"
45#include "DiphoneVoiceModule.h"
46#include "EST_DiphoneCoverage.h"
47#include "EST_rw_status.h"
48#include "EST_viterbi.h"
49#include "EST_Track.h"
50#include "EST_track_aux.h"
51#include "EST_Wave.h"
52#include "EST_THash.h"
53#include "EST_TList.h"
54#include "EST_types.h"
55#include "ling_class/EST_Utterance.h"
56#include "siod.h"
57#include "siod_est.h"
58#include "safety.h"
59#include <cstdlib>
60
61#include "EST_TargetCost.h"
62#include "TargetCostRescoring.h"
63#include "EST_JoinCost.h"
64#include "EST_JoinCostCache.h"
65
66#include "EST_Val.h"
67
68using namespace std;
69
70
71SIOD_REGISTER_TYPE(itemlist,ItemList)ItemList *itemlist(LISP x) { return itemlist(val(x)); } int itemlist_p
(LISP x) { if (val_p(x) && (val_type_itemlist == val(
x).type())) return (1==1); else return (1==0); } LISP siod(const
ItemList *v) { if (v == 0) return ((struct obj *) 0); else return
siod(est_val(v)); }
72VAL_REGISTER_TYPE(itemlist,ItemList)val_type val_type_itemlist="itemlist"; ItemList *itemlist(const
EST_Val &v) { if (v.type() == val_type_itemlist) return (
ItemList *)v.internal_ptr(); else (EST_error_where = __null),
(*EST_error_func)("val not of type val_type_""itemlist"); return
__null; } static void val_delete_itemlist(void *v) { delete (
ItemList *)v; } EST_Val est_val(const ItemList *v) { return EST_Val
(val_type_itemlist, (void *)v,val_delete_itemlist); }
73
74// from src/modules/UniSyn_diphone/us_diphone.h
75// this won't be staying here long...
76void parse_diphone_times(EST_Relation &diphone_stream,
77 EST_Relation &source_lab);
78
79SIOD_REGISTER_CLASS(du_voice,DiphoneUnitVoice)class DiphoneUnitVoice *du_voice(LISP x) { return du_voice(val
(x)); } int du_voice_p(LISP x) { if (val_p(x) && (val_type_du_voice
== val(x).type())) return (1==1); else return (1==0); } LISP
siod(const class DiphoneUnitVoice *v) { if (v == 0) return (
(struct obj *) 0); else return siod(est_val(v)); }
80VAL_REGISTER_CLASS(du_voice,DiphoneUnitVoice)val_type val_type_du_voice="du_voice"; class DiphoneUnitVoice
*du_voice(const EST_Val &v) { if (v.type() == val_type_du_voice
) return (class DiphoneUnitVoice *)v.internal_ptr(); else (EST_error_where
= __null), (*EST_error_func)("val not of type val_type_""du_voice"
); return __null; } static void val_delete_du_voice(void *v) {
delete (class DiphoneUnitVoice *)v; } EST_Val est_val(const class
DiphoneUnitVoice *v) { return EST_Val(val_type_du_voice, (void
*)v,val_delete_du_voice); }
81
82static void my_parse_diphone_times(EST_Relation &diphone_stream,
83 EST_Relation &source_lab)
84{
85 EST_Item *s, *u;
86 float dur1, dur_u, p_time=0.0;
87
88 // NOTE: because of the extendLeft/extendRight phone join hack for missing diphones,
89 // the unit linked list *may be* shorter that the segment list.
90 //(admittedly could cause confusion)
91
92 for( s=source_lab.head(), u=diphone_stream.head(); (u!=0)&&(s!=0); u=u->next(), s=s->next()){
93 EST_Track *pm = track(u->f("coefs"));
94
95 int end_frame = pm->num_frames() - 1;
96 int mid_frame = u->I("middle_frame");
97
98 dur1 = pm->t(mid_frame);
99 dur_u = pm->t(end_frame);
100
101 s->set("end", (p_time+dur1) );
102
103 p_time += dur_u;
104 u->set("end", p_time);
105
106 if( u->f_present("extendRight") ){//because diphone squeezed out (see above)
107 s = s->next();
108 s->set("end", p_time );
109 }
110 }
111
112 if(s)
113 s->set("end", (p_time));
114}
115
116// temporary hack necessary because decoder can only take a
117// function pointer (would be better to relax this restriction in
118// the EST_Viterbi_Decoder class, or in a replacement class, rather
119// than using this hack)
120static DiphoneUnitVoice *globalTempVoicePtr = 0;
121
122DiphoneUnitVoice::DiphoneUnitVoice( const EST_StrList& basenames,
123 const EST_String& uttDir,
124 const EST_String& wavDir,
125 const EST_String& pmDir,
126 const EST_String& coefDir,
127 unsigned int sr,
128 const EST_String& uttExt,
129 const EST_String& wavExt,
130 const EST_String& pmExt,
131 const EST_String& coefExt )
132 : pruning_beam( -1 ),
133 ob_pruning_beam( -1 ),
134 tc_rescoring_beam( -1 ),
135 tc_rescoring_weight( 0.0 ),
136 tc_weight( 1.0 ),
137 jc_weight( 1.0 ),
138 jc_f0_weight( 1.0 ),
139 jc_power_weight( 1.0 ),
140 jc_spectral_weight( 1.0 ),
141 prosodic_modification( 0 ),
142 wav_srate( sr ),
143 jc( 0 ),
144 jc_delete( false ),
145 tc( 0 ),
146 tc_delete( false ),
147 tcdh( 0 )
148
149{
150 // make the default voice module with the supplied parameters
151 addVoiceModule( basenames, uttDir, wavDir, pmDir, coefDir,
152 wav_srate,
153 uttExt, wavExt, pmExt, coefExt );
154
155 diphone_backoff_rules = 0;
156}
157
158void DiphoneUnitVoice::initialise( bool ignore_bad_tag )
159{
160 if( jc == 0 )
161 EST_error(EST_error_where = __null), (*EST_error_func)( "Need to set join cost calculator for voice" );
162
163 if( tc == 0 )
164 EST_error(EST_error_where = __null), (*EST_error_func)( "Need to set target cost calculator for voice" );
165
166 EST_TList<DiphoneVoiceModule*>::Entries it;
167
168 for( it.begin(voiceModules); it; it++ )
169 (*it)->initialise( tc, ignore_bad_tag );
170}
171
172bool DiphoneUnitVoice::addVoiceModule( const EST_StrList& basenames,
173 const EST_String& uttDir,
174 const EST_String& wavDir,
175 const EST_String& pmDir,
176 const EST_String& coefDir,
177 unsigned int srate,
178 const EST_String& uttExt,
179 const EST_String& wavExt,
180 const EST_String& pmExt,
181 const EST_String& coefExt )
182
183{
184 DiphoneVoiceModule *vm;
185
186 if( srate != wav_srate )
187 EST_error(EST_error_where = __null), (*EST_error_func)( "Voice samplerate: %d\nmodule samplerate: %d",
188 wav_srate, srate );
189
190 vm = new DiphoneVoiceModule( basenames, uttDir, wavDir, pmDir, coefDir,
191 srate,
192 uttExt, wavExt, pmExt, coefExt );
193 CHECK_PTR(vm)if((vm)==0){ (EST_error_where = __null), (*EST_error_func)("memory allocation failed (file %s, line %d)"
, "DiphoneUnitVoice.cc",193);}
;
194
195 registerVoiceModule( vm );
196
197 return true;
198}
199
200
201void DiphoneUnitVoice::registerVoiceModule( DiphoneVoiceModule *vm )
202{
203 voiceModules.append( vm );
204}
205
206
207void DiphoneUnitVoice::setJoinCost( EST_JoinCost *jcost, bool del )
208{
209 if( jc_delete == true )
210 if( jc != 0 )
211 delete jc;
212
213 jc = jcost;
214 jc_delete = del;
215}
216
217void DiphoneUnitVoice::setTargetCost( EST_TargetCost *tcost, bool del )
218{
219 if( tc_delete == true )
220 if( tc != 0 )
221 delete tc;
222
223 tc = tcost;
224 tc_delete = del;
225}
226
227
228DiphoneUnitVoice::~DiphoneUnitVoice()
229{
230 EST_TList<DiphoneVoiceModule*>::Entries it;
231
232 for( it.begin(voiceModules); it; it++ )
233 delete( *it );
234
235 if(diphone_backoff_rules)
236 delete diphone_backoff_rules;
237
238 if( jc_delete == true )
239 if( jc != 0 )
240 delete jc;
241
242 if( tc_delete == true )
243 if( tc != 0 )
244 delete tc;
245
246 if(tcdh)
247 delete tcdh;
248
249}
250
251
252void DiphoneUnitVoice::addToCatalogue( const EST_Utterance *utt )
253{
254 // needed?
255}
256
257
258void DiphoneUnitVoice::getDiphone( const EST_VTCandidate *cand,
259 EST_Track* coef, EST_Wave* sig, int *midframe,
260 bool extendLeft, bool extendRight )
261{
262 // The need for this function in this class is a bit messy, it would be far
263 // nicer just to be able to ask the Candidate itself to hand over the relevant
264 // synthesis parameters. In future, it will work that way ;)
265
266 // put there by DiphoneVoiceModule::getCandidateList
267 const DiphoneCandidate *diphcand = diphonecandidate( cand->name );
268
269 const DiphoneVoiceModule* parentModule = diphcand->dvm;
270 EST_Item *firstPhoneInDiphone = cand->s;
271
272 // need to call right getDiphone to do the actual work
273 parentModule->getDiphone( firstPhoneInDiphone, coef, sig, midframe, extendLeft, extendRight );
274}
275
276// REQUIREMENT: the unit relation must have previously been used to initialise the
277// Viterbi decoder from which the path was produced.
278void DiphoneUnitVoice::fillUnitRelation( EST_Relation *units, const EST_VTPath *path )
279{
280 EST_Item *it=units->tail();
281
282 for ( ; path != 0 && it != 0; path=path->from, it=it->prev() ){
283 EST_Track *coefs = new EST_Track;
284 CHECK_PTR(coefs)if((coefs)==0){ (EST_error_where = __null), (*EST_error_func)
("memory allocation failed (file %s, line %d)", "DiphoneUnitVoice.cc"
,284);}
;
285 EST_Wave *sig = new EST_Wave;
286 CHECK_PTR(sig)if((sig)==0){ (EST_error_where = __null), (*EST_error_func)("memory allocation failed (file %s, line %d)"
, "DiphoneUnitVoice.cc",286);}
;
287 int midf;
288
289 getDiphone( path->c, coefs, sig, &midf,
290 it->f_present("extendLeft"), it->f_present("extendRight"));
291
292 EST_Item *firstPhoneInDiphone = path->c->s;
293 it->set_val( "sig", est_val( sig ) );
294 it->set_val( "coefs", est_val( coefs ) );
295 it->set( "middle_frame", midf );
296 it->set( "source_utt", firstPhoneInDiphone->relation()->utt()->f.S("fileid"));
297 it->set_val( "source_ph1", est_val( firstPhoneInDiphone ));
298 it->set( "source_end", firstPhoneInDiphone->F("end"));
299 it->set( "target_cost", path->c->score );
300
301 //have to recalculate join cost as it's not currently saved anywhere
302 if( path->from == 0 )
303 it->set( "join_cost", 0.0);
304 else{
305 // join cost between right edge of left diphone and vice versa
306 const DiphoneCandidate *l_diph = diphonecandidate(path->from->c->name);
307 const DiphoneCandidate *r_diph = diphonecandidate(path->c->name);
308
309 it->set( "join_cost", (*jc)( l_diph, r_diph ) );
310 }
311 }
312}
313
314// The use of the globalFunctionPtr in this function is a really just a temporary hack
315// necessary because the decoder as it stands at present can only take a function pointer
316// (would be better to relax this restriction in the EST_Viterbi_Decoder class, or in a
317// replacement class, rather than using this hack)
318// static EST_VTPath* extendPath( EST_VTPath *p, EST_VTCandidate *c,
319// EST_Features&)
320// {
321// EST_VTPath *np = new EST_VTPath;
322// CHECK_PTR(np);
323
324// if( globalTempVoicePtr ==0 )
325// EST_error( "globalTempVoicePtr is not set, can't continue" );
326
327// const EST_JoinCost &jcost = globalTempVoicePtr->getJoinCostCalculator();
328
329// np->c = c;
330// np->from = p;
331// np->state = c->pos;
332
333// if ((p == 0) || (p->c == 0))
334// np->score = c->score;
335// else{
336// // join cost between right edge of left diphone and vice versa
337// np->score = p->score + c->score + jcost( p->c->s->next(), c->s );
338// }
339// return np;
340// }
341static EST_VTPath* extendPath( EST_VTPath *p, EST_VTCandidate *c,
342 EST_Features&)
343{
344 EST_VTPath *np = new EST_VTPath;
345 CHECK_PTR(np)if((np)==0){ (EST_error_where = __null), (*EST_error_func)("memory allocation failed (file %s, line %d)"
, "DiphoneUnitVoice.cc",345);}
;
346
347 if( globalTempVoicePtr ==0 )
348 EST_error(EST_error_where = __null), (*EST_error_func)( "globalTempVoicePtr is not set, can't continue" );
349
350 const EST_JoinCost &jcost = globalTempVoicePtr->getJoinCostCalculator();
351
352 np->c = c;
353 np->from = p;
354 np->state = c->pos;
355
356 if ((p == 0) || (p->c == 0))
357 np->score = c->score;
358 else{
359 const DiphoneCandidate *l_diph = diphonecandidate(p->c->name);
360 const DiphoneCandidate *r_diph = diphonecandidate(c->name);
361
362 // join cost between right edge of left diphone and vice versa
363 np->score = p->score + c->score + jcost( l_diph, r_diph );
364 }
365 return np;
366}
367
368// This function is a really just a temporary hack necessary because the decoder
369// as it stands at present can only take a function pointer (would be better to relax
370// this restriction in the EST_Viterbi_Decoder class, or in a replacement class, rather
371// than using this hack)
372static EST_VTCandidate* getCandidatesFunction( EST_Item *s,
373 EST_Features &f)
374{
375 DiphoneUnitVoice *duv = globalTempVoicePtr;
1
'duv' initialized here
376 if( duv==0 )
2
Assuming 'duv' is equal to null
3
Taking true branch
377 EST_error(EST_error_where = __null), (*EST_error_func)( "Candidate source voice is unset" );
378
379 return duv->getCandidates( s, f );
4
Called C++ object pointer is null
380}
381
382// Function which, given an item from the timeline relation that
383// was originally used to initialise the EST_Viterbi_Decoder
384// returns a pointer to a linked list of EST_VTCandidates
385// (this is provided to the viterbi decoder upon its construction
386// and (in)directly called by it as part of the decoding process...)
387EST_VTCandidate* DiphoneUnitVoice::getCandidates( EST_Item *s,
388 EST_Features &f) const
389{
390 EST_VTCandidate *c = 0;
391 EST_VTCandidate *moduleListHead = 0;
392 EST_VTCandidate *moduleListTail = 0;
393
394 // these objects [c/sh]ould be a parameter visible in the user's script
395 // land, and will be in future...
396
397 // tc now a member
398 // EST_DefaultTargetCost default_target_cost;
399 // EST_TargetCost *tc = &default_target_cost;
400 // or
401 // EST_SchemeTargetCost scheme_target_cost(rintern( "targetcost"));
402 // EST_TargetCost *tc = &scheme_target_cost;
403
404 EST_TList<DiphoneVoiceModule*>::Entries module_iter;
405 int nfound, total=0;
406
407 ////////////////////////////////////////////////////////////////
408 // join linked list of candidates from each module into one list
409 for( module_iter.begin(voiceModules); module_iter; module_iter++ ){
410 nfound = (*module_iter)->getCandidateList( *s,
411 tc,
412 tcdh,
413 tc_weight,
414 &moduleListHead,
415 &moduleListTail );
416 if( nfound>0 ){
417 moduleListTail->next = c;
418 c = moduleListHead;
419 total += nfound;
420 }
421 }
422
423 if( total==0 )
424 EST_error(EST_error_where = __null), (*EST_error_func)( "Couldn't find diphone %s", (const char*)s->S("name") );
425
426 if( verbosity() > 0 )
427 printf( "Number of candidates found for target \"%s\": %d\n",
428 (const char*)s->S("name"), total );
429
430 if( ! ((tc_rescoring_beam == -1.0) || (tc_rescoring_weight <= 0.0)) )
431 rescoreCandidates( c, tc_rescoring_beam, tc_rescoring_weight );
432
433 return c;
434}
435
436void DiphoneUnitVoice::diphoneCoverage(const EST_String filename) const
437{
438
439 EST_DiphoneCoverage dc;
440 EST_TList<DiphoneVoiceModule*>::Entries module_iter;
441
442 // for each module
443 for( module_iter.begin(voiceModules); module_iter; module_iter++ )
444 (*module_iter)->getDiphoneCoverageStats(&dc);
445
446 dc.print_stats(filename);
447
448}
449
450
451
452bool DiphoneUnitVoice::synthesiseWave( EST_Utterance *utt )
453{
454 getUnitSequence( utt );
455
456 return true;
457}
458
459
460
461void DiphoneUnitVoice::getUnitSequence( EST_Utterance *utt )
462{
463 EST_Relation *segs = utt->relation( "Segment" );
464 EST_Relation *units = utt->create_relation( "Unit" );
465
466 if(!tcdh)
467 tcdh = new TCDataHash(20);
468 else
469 tcdh->clear();
470
471 // Initialise the Unit relation time index for decoder
472 EST_String diphone_name;
473 EST_StrList missing_diphones;
474
475 EST_Item *it=segs->head();
476 if( it == 0 )
477 EST_error(EST_error_where = __null), (*EST_error_func)( "Segment relation is empty" );
478
479 bool extendLeftFlag = false;
480 for( ; it->next(); it=it->next() )
481 {
482 EST_String l = it->S("name");
483 EST_String r = it->next()->S("name");
484
485 EST_String diphone_name = EST_String::cat(l,"_",r);
486 EST_String orig = diphone_name;
487
488 if(tc->is_flatpack())
489 tcdh->add_item( it , ((EST_FlatTargetCost *)tc)->flatpack(it) );
490
491
492 // First attempt back off:
493 // If missing diphone is an interword diphone, insert a silence!
494 // Perceptual results say this is prefered.
495
496 if ( diphone_name != EST_String::Empty &&
497 !this->unitAvailable(diphone_name) )
498 {
499 EST_Item *s1,*s2;
500 EST_Item *w1=0,*w2=0;
501
502 cout << "Missing diphone: "<< diphone_name << endl;
503
504 if((s1 = parent(it,"SylStructure")))
505 w1= parent(s1,"SylStructure");
506 if( (s2 = parent(it->next(),"SylStructure")))
507 w2= parent(s2,"SylStructure");
508
509 if( w1 && w2 && (w1 != w2) )
510 {
511 EST_Item *sil;
512
513 cout << " Interword so inseting silence.\n";
514
515 sil = it->insert_after();
516 sil->set("name",ph_silence());
517
518 r = it->next()->S("name");
519 diphone_name = EST_String::cat(l,"_",r);
520
521 }
522 }
523
524
525 // Simple back off.
526 // Change diphone name for one we actually have.
527
528 while(diphone_name != EST_String::Empty &&
529 !this->unitAvailable(diphone_name) &&
530 diphone_backoff_rules)
531 {
532
533 cout << " diphone still missing, backing off: " << diphone_name << endl;
534
535 diphone_name = diphone_backoff_rules->backoff(l,r);
536 l = diphone_name.before("_");
537 r = diphone_name.after("_");
538
539 cout << " backed off: " << orig << " -> " << diphone_name << endl;
540
541 if( verbosity() > 0 ){
542 EST_warning(EST_error_where = __null), (*EST_warning_func)("Backing off requested diphone %s to %s",
543 orig.str(),
544 diphone_name.str() );
545 }
546 }
547
548
549 //// Complex backoff. Changes the segment stream to the right,
550 //// may still leave a discontinuity to the left. This could be
551 //// fixed, but it would requires a better search. Rob's thoughts
552 //// are that the simple method works better, unless it resorts to
553 //// a bad default rule.
554
555
556 // while(!this->unitAvailable(diphone_name) &&
557 // diphone_backoff_rules &&
558 // !diphone_backoff_rules->backoff(it))
559 // diphone_name = EST_String::cat(it->S("name"),"_",it->next()->S("name"));
560
561 if( !this->unitAvailable( diphone_name ) ){
562 missing_diphones.append( diphone_name );
563 if(units->tail())
564 units->tail()->set( "extendRight", 1 );
565 extendLeftFlag = true; // trigger for next unit to make up second half of missing diphone
566 }
567 else{
568 EST_Item *t = units->append();
569 t->set( "name", diphone_name );
570 if(orig != diphone_name)
571 t->set( "missing_diphone",orig);
572 t->set_val( "ph1", est_val(it) );
573 if( extendLeftFlag == true ){
574 t->set( "extendLeft", 1 );
575 extendLeftFlag = false;
576 }
577 }
578 }
579
580 // stop if necessary units are still missing.
581 if( missing_diphones.length() > 0 ){
582 for( EST_Litem *it=missing_diphones.head(); it!=0 ; it=it->next() )
583 printf( "requested diphone missing: %s\n", missing_diphones(it).str() );
584
585 EST_warning(EST_error_where = __null), (*EST_warning_func)("Making phone joins to compensate...");
586 // EST_error("Unable to synthesise utterance due to missing diphones");
587 }
588
589 // Make the decoder do its thing
590 // -1 means number of states at each time point not fixed
591 EST_Viterbi_Decoder v( getCandidatesFunction, extendPath, -1 );
592
593 // turn on pruning if necessary
594 if( (pruning_beam>0) || (ob_pruning_beam>0) )
595 v.set_pruning_parameters( pruning_beam, ob_pruning_beam );
596
597 // temporary hack necessary because decoder can only take a
598 // function pointer (would be better to relax this restriction in
599 // the EST_Viterbi_Decoder class, or in a replacement class, rather
600 // than using this hack)
601 globalTempVoicePtr = this;
602
603 v.set_big_is_good(false);
604
605 if( verbosity() > 0 )
606 v.turn_on_trace();
607
608 v.initialise( units );
609 v.search();
610
611 // take hold of the best path (end thereof)
612 EST_VTPath *bestp=0;
613 if( !v.result( &bestp ) )
614 EST_error(EST_error_where = __null), (*EST_error_func)( "No best candidate sequence found" );
615
616 // fill in the best path features in the Unit Relation
617 fillUnitRelation( units, bestp );
618
619 my_parse_diphone_times( *units, *segs );
620}
621
622
623/////////////////////////////////////////////////////////////////////////////////////
624// Canned example experimental code (proof of concept rather than intelligently done)
625
626static inline bool itemListContainsItem( const ItemList* il, const EST_Item *item )
627{
628 ItemList::Entries it;
629
630 for( it.begin( *il ); it; it++ )
631 if( (*it) == item )
632 return true;
633
634 return false;
635}
636
637
638static EST_VTCandidate* getCandidatesWithOmissionsFunction( EST_Item *s, EST_Features &f )
639{
640 DiphoneUnitVoice *duv = globalTempVoicePtr;
641 if( duv==0 )
642 EST_error(EST_error_where = __null), (*EST_error_func)( "Candidate source voice is unset" );
643
644 //get candidate list as usual
645 EST_VTCandidate *candlist = duv->getCandidates( s, f );
646
647 //filter out candidates on basis of omission list (yes, this is quite dumb)
648 if( s->f_present( "omitlist" ) ){
649
650 EST_warning(EST_error_where = __null), (*EST_warning_func)( "omitlist found in unit %s", s->S("name").str() );
651
652 ItemList *omitlist = itemlist( s->f("omitlist") );
653
654 //until one candidate remains as head (to keep hold of list head)
655 while( candlist != 0 && itemListContainsItem( omitlist, candlist->s ) ){
656 EST_VTCandidate *del_cand = candlist;
657 candlist = candlist->next;
658 del_cand->next = 0; //so deletion doesn't trigger total list deletion
659 delete del_cand;
660 }
661
662 //then continue down list
663 EST_VTCandidate *prev = candlist;
664 EST_VTCandidate *cand = candlist->next;
665 while( cand!=0 ){
666 if( itemListContainsItem( omitlist, cand->s ) ){ //delete cand on true
667 prev->next = cand->next;
668 cand->next = 0; //so deletion doesn't trigger total list deletion
669 delete cand;
670 cand = prev;
671 }
672 cand = cand->next;
673 }
674
675 if( candlist == 0 )
676 EST_error(EST_error_where = __null), (*EST_error_func)( "zero candidates remain after filtering" );
677
678 }
679
680 return candlist;
681}
682
683// For when the utterance already has the unit sequence, with certain candidates
684// flagged as to be avoided, or mandatory and so on...
685void DiphoneUnitVoice::regetUnitSequence( EST_Utterance *utt )
686{
687 // Unit relation should already be in existence for decoder
688 EST_Relation *units = utt->relation( "Unit" );
689 EST_Item *it=units->head();
690 if( it == 0 )
691 EST_error(EST_error_where = __null), (*EST_error_func)( "Unit relation is empty" );
692
693 // Make the decoder do its thing (again)
694 // -1 means number of states at each time point not fixed
695 EST_Viterbi_Decoder v( getCandidatesWithOmissionsFunction, extendPath, -1 );
696
697 // turn on pruning if necessary
698 if( (pruning_beam>0) || (ob_pruning_beam>0) )
699 v.set_pruning_parameters( pruning_beam, ob_pruning_beam );
700
701 // temporary hack necessary because decoder can only take a
702 // function pointer (would be better to relax this restriction in
703 // the EST_Viterbi_Decoder class, or in a replacement class, rather
704 // than using this hack)
705 globalTempVoicePtr = this;
706
707 v.set_big_is_good(false);
708
709 if( verbosity() > 0 )
710 v.turn_on_trace();
711
712 v.initialise( units );
713 v.search();
714
715 // take hold of the best path (end thereof)
716 EST_VTPath *bestp=0;
717 if( !v.result( &bestp ) )
718 EST_error(EST_error_where = __null), (*EST_error_func)( "No best candidate sequence found" );
719
720 // fill in the best path features in the Unit Relation
721 fillUnitRelation( units, bestp );
722
723 EST_Relation *segs = utt->relation("Segment");
724 my_parse_diphone_times( *units, *segs );
725}
726
727// End canned example experimental code ///////////////////////////////////////////
728///////////////////////////////////////////////////////////////////////////////////
729
730
731bool DiphoneUnitVoice::unitAvailable( const EST_String &diphone ) const
732{
733 EST_TList<DiphoneVoiceModule*>::Entries it;
734
735 for( it.begin(voiceModules); it; it++ )
736 if( (*it)->numAvailableCandidates(diphone) > 0 )
737 return true;
738
739 return false;
740}
741
742unsigned int DiphoneUnitVoice::numAvailableCandidates( const EST_String &diphone ) const
743{
744 unsigned int number = 0;
745 EST_TList<DiphoneVoiceModule*>::Entries it;
746
747 for( it.begin(voiceModules); it; it++ )
748 number += (*it)->numAvailableCandidates(diphone);
749
750 return number;
751}
752
753
754////////////////////////////////////////////////////////////////////////
755////////////////////////////////////////////////////////////////////////
756// special case of the above for utterances structures that are
757// actually in the voice database, which doesn't do any search
758// This is useful for doing copy synthesis of utterances (eg.
759// to test out resynthesis, prosodic modification and so on)
760void DiphoneUnitVoice::getCopyUnitUtterance( const EST_String &utt_fname,
761 EST_Utterance **utt_out ) const
762{
763 // need to find which, if any, voice module has this utterance
764 // in its list
765 EST_TList<DiphoneVoiceModule*>::Entries module_iter;
766 EST_Utterance *db_utt=0;
767 for( module_iter.begin(voiceModules); module_iter; module_iter++ )
768 if( (*module_iter)->getUtterance(&db_utt, "fileid", utt_fname) == true )
769 break;
770
771 if( db_utt == 0 )
772 EST_error(EST_error_where = __null), (*EST_error_func)( "Could not find Utterance %s in any voice module",
773 utt_fname.str() );
774 else{
775 // deep copy database utterance and fill in Unit relation
776 *utt_out = new EST_Utterance( *db_utt );
777 CHECK_PTR(utt_out)if((utt_out)==0){ (EST_error_where = __null), (*EST_error_func
)("memory allocation failed (file %s, line %d)", "DiphoneUnitVoice.cc"
,777);}
;
778
779 EST_Utterance myUtt( *db_utt );
780
781 cerr << myUtt.relation_present( "Segment" ) << " "
782 << myUtt.num_relations() <<endl;
783
784
785 cerr << db_utt->relation_present( "Segment" ) << " "
786 << (*utt_out)->relation_present( "Segment" ) << " "
787 << (*utt_out)->num_relations() <<endl;
788
789
790 EST_Relation *segs = (*utt_out)->relation( "Segment" );
791 EST_Relation *units = (*utt_out)->create_relation( "Unit" );
792
793 // Initialise the Unit relation + fill in necessary/suitable
794 // synthesis parameters
795 EST_String ph1, ph2;
796 EST_Item *it = segs->tail();
797 EST_Item *db_utt_seg_it = db_utt->relation( "Segment" )->tail();
798 if( it == 0 )
799 EST_error(EST_error_where = __null), (*EST_error_func)( "Segment relation is empty" );
800 else{
801 ph2 = it->S("name");
802 while( ((it=it->prev())!=0) &&
803 ((db_utt_seg_it=db_utt_seg_it->prev())!=0) ){
804 EST_Track *coefs = new EST_Track;
805 CHECK_PTR(coefs)if((coefs)==0){ (EST_error_where = __null), (*EST_error_func)
("memory allocation failed (file %s, line %d)", "DiphoneUnitVoice.cc"
,805);}
;
806 EST_Wave *sig = new EST_Wave;
807 CHECK_PTR(sig)if((sig)==0){ (EST_error_where = __null), (*EST_error_func)("memory allocation failed (file %s, line %d)"
, "DiphoneUnitVoice.cc",807);}
;
808 int midf;
809
810 (*module_iter)->getDiphone( db_utt_seg_it, coefs, sig, &midf );
811
812 ph1 = it->S("name");
813 EST_Item *t = units->prepend();
814 t->set( "name", EST_String::cat(ph1,"_",ph2) );
815 t->set_val( "ph1", est_val(it) );
816 t->set_val( "sig", est_val( sig ) );
817 t->set_val( "coefs", est_val( coefs ) );
818 t->set( "middle_frame", midf );
819 t->set( "source_utt", db_utt->f.S("fileid"));
820 t->set_val( "source_ph1", est_val( db_utt_seg_it ));
821 t->set( "source_end", db_utt_seg_it->F("end"));
822 t->set( "target_cost", 0.0 );
823 t->set( "join_cost", 0.0);
824
825 ph2 = ph1;
826 }
827 }
828 my_parse_diphone_times( *units, *segs );
829
830 // this is for copy synthesis, so copy actual timings
831 //for( EST_Item *seg = segs->head(); it!=0; it=it->next() )
832 //seg->set( "end", seg->F("source_end") );
833 }
834}
835
836////////////////////////////////////////////////////////////////////////
837////////////////////////////////////////////////////////////////////////
838
839
840
841unsigned int DiphoneUnitVoice::numUnitTypes() const
842{
843 //necessary?
844 return 0;
845}
846
847unsigned int DiphoneUnitVoice::numDatabaseUnits() const
848{
849 unsigned int sum=0;
850
851 EST_TList<DiphoneVoiceModule*>::Entries it;
852
853 for( it.begin( voiceModules ); it; it++ )
854 sum += (*it)->numModuleUnits();
855
856 return sum;
857}
858
859
860//////////////////////////////////////////////////////////////////////////
861
862void DiphoneUnitVoice::set_diphone_backoff(DiphoneBackoff *dbo)
863{
864 if (diphone_backoff_rules)
865 delete diphone_backoff_rules;
866 diphone_backoff_rules = dbo;
867}
868
869
870int DiphoneUnitVoice::getPhoneList( const EST_String &phone, ItemList &list )
871{
872 unsigned int n=0;
873
874 EST_TList<DiphoneVoiceModule*>::Entries it;
875 for( it.begin( voiceModules ); it; it++ )
876 n += (*it)->getPhoneList( phone, list );
877
878 return n;
879}
880
881
882
883void DiphoneUnitVoice::precomputeJoinCosts( const EST_StrList &phones, bool verbose )
884{
885 EST_StrList::Entries it;
886 for( it.begin( phones ); it; it++ ){
887 ItemList *l = new ItemList;
888 CHECK_PTR(l)if((l)==0){ (EST_error_where = __null), (*EST_error_func)("memory allocation failed (file %s, line %d)"
, "DiphoneUnitVoice.cc",888);}
;
889
890 unsigned int n = getPhoneList( (*it), *l );
891
892 if( verbose==true )
893 cerr << "phone " << (*it) << " " << n << " instances\n";
894
895 if( n>0 ){
896 jc->computeAndCache( *l, true ); //verbose=true
897 }
898 else
899 EST_warning(EST_error_where = __null), (*EST_warning_func)( "Phone %s not listed in voice", (*it).str() );
900
901 delete l;
902 }
903}