DiphoneUnitVoice.cc

Bug Summary

File:	modules/MultiSyn/DiphoneUnitVoice.cc
Location:	line 379, column 10
Description:	Called C++ object pointer is null

Annotated Source Code

/*************************************************************************/

/* */

/* Centre for Speech Technology Research */

/* (University of Edinburgh, UK) and */

/* Korin Richmond */

/* */

/* Permission is hereby granted, free of charge, to use and distribute */

/* this software and its documentation without restriction, including */

/* without limitation the rights to use, copy, modify, merge, publish, */

/* distribute, sublicense, and/or sell copies of this work, and to */

/* permit persons to whom this work is furnished to do so, subject to */

/* the following conditions: */

/* */

/* 1. The code must retain the above copyright notice, this list of */

/* conditions and the following disclaimer. */

/* 2. Any modifications must be clearly marked as such. */

/* 3. Original authors' names are not deleted. */

/* 4. The authors' names are not used to endorse or promote products */

/* derived from this software without specific prior written */

/* permission. */

/* */

/* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */

/* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */

/* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT */

/* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */

/* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */

/* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */

/* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */

/* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */

/* THIS SOFTWARE. */

/* */

/*************************************************************************/

/* */

/* Author: Korin Richmond */

/* Date: Aug 2002 */

/* --------------------------------------------------------------------- */

/* first stab at a diphone unit selection "voice" - using a list of */

/* utterance objects */

/*************************************************************************/

#include "festival.h"

#include "DiphoneUnitVoice.h"

#include "DiphoneVoiceModule.h"

#include "EST_DiphoneCoverage.h"

#include "EST_rw_status.h"

#include "EST_viterbi.h"

#include "EST_Track.h"

#include "EST_track_aux.h"

#include "EST_Wave.h"

#include "EST_THash.h"

#include "EST_TList.h"

#include "EST_types.h"

#include "ling_class/EST_Utterance.h"

#include "siod.h"

#include "siod_est.h"

#include "safety.h"

#include <cstdlib>

#include "EST_TargetCost.h"

#include "TargetCostRescoring.h"

#include "EST_JoinCost.h"

#include "EST_JoinCostCache.h"

#include "EST_Val.h"

using namespace std;

SIOD_REGISTER_TYPE(itemlist,ItemList)ItemList *itemlist(LISP x) { return itemlist(val(x)); } int itemlist_p
(LISP x) { if (val_p(x) && (val_type_itemlist == val(
x).type())) return (1==1); else return (1==0); } LISP siod(const
ItemList *v) { if (v == 0) return ((struct obj *) 0); else return
siod(est_val(v)); }

VAL_REGISTER_TYPE(itemlist,ItemList)val_type val_type_itemlist="itemlist"; ItemList *itemlist(const
EST_Val &v) { if (v.type() == val_type_itemlist) return (
ItemList *)v.internal_ptr(); else (EST_error_where = __null),
(*EST_error_func)("val not of type val_type_""itemlist"); return
__null; } static void val_delete_itemlist(void *v) { delete (
ItemList *)v; } EST_Val est_val(const ItemList *v) { return EST_Val
(val_type_itemlist, (void *)v,val_delete_itemlist); }

// from src/modules/UniSyn_diphone/us_diphone.h

// this won't be staying here long...

void parse_diphone_times(EST_Relation &diphone_stream,

EST_Relation &source_lab);

SIOD_REGISTER_CLASS(du_voice,DiphoneUnitVoice)class DiphoneUnitVoice *du_voice(LISP x) { return du_voice(val
(x)); } int du_voice_p(LISP x) { if (val_p(x) && (val_type_du_voice
== val(x).type())) return (1==1); else return (1==0); } LISP
siod(const class DiphoneUnitVoice *v) { if (v == 0) return (
(struct obj *) 0); else return siod(est_val(v)); }

VAL_REGISTER_CLASS(du_voice,DiphoneUnitVoice)val_type val_type_du_voice="du_voice"; class DiphoneUnitVoice
*du_voice(const EST_Val &v) { if (v.type() == val_type_du_voice
) return (class DiphoneUnitVoice *)v.internal_ptr(); else (EST_error_where
= __null), (*EST_error_func)("val not of type val_type_""du_voice"
); return __null; } static void val_delete_du_voice(void *v) {
delete (class DiphoneUnitVoice *)v; } EST_Val est_val(const class
DiphoneUnitVoice *v) { return EST_Val(val_type_du_voice, (void
*)v,val_delete_du_voice); }

static void my_parse_diphone_times(EST_Relation &diphone_stream,

EST_Relation &source_lab)

{

EST_Item *s, *u;

float dur1, dur_u, p_time=0.0;

// NOTE: because of the extendLeft/extendRight phone join hack for missing diphones,

// the unit linked list *may be* shorter that the segment list.

//(admittedly could cause confusion)

for( s=source_lab.head(), u=diphone_stream.head(); (u!=0)&&(s!=0); u=u->next(), s=s->next()){

EST_Track *pm = track(u->f("coefs"));

int end_frame = pm->num_frames() - 1;

int mid_frame = u->I("middle_frame");

dur1 = pm->t(mid_frame);

dur_u = pm->t(end_frame);

100

101

s->set("end", (p_time+dur1) );

102

103

p_time += dur_u;

104

u->set("end", p_time);

105

106

if( u->f_present("extendRight") ){//because diphone squeezed out (see above)

107

s = s->next();

108

s->set("end", p_time );

109

}

110

}

111

112

if(s)

113

s->set("end", (p_time));

114

}

115

116

// temporary hack necessary because decoder can only take a

117

// function pointer (would be better to relax this restriction in

118

// the EST_Viterbi_Decoder class, or in a replacement class, rather

119

// than using this hack)

120

static DiphoneUnitVoice *globalTempVoicePtr = 0;

121

122

DiphoneUnitVoice::DiphoneUnitVoice( const EST_StrList& basenames,

123

const EST_String& uttDir,

124

const EST_String& wavDir,

125

const EST_String& pmDir,

126

const EST_String& coefDir,

127

unsigned int sr,

128

const EST_String& uttExt,

129

const EST_String& wavExt,

130

const EST_String& pmExt,

131

const EST_String& coefExt )

132

: pruning_beam( -1 ),

133

ob_pruning_beam( -1 ),

134

tc_rescoring_beam( -1 ),

135

tc_rescoring_weight( 0.0 ),

136

tc_weight( 1.0 ),

137

jc_weight( 1.0 ),

138

jc_f0_weight( 1.0 ),

139

jc_power_weight( 1.0 ),

140

jc_spectral_weight( 1.0 ),

141

prosodic_modification( 0 ),

142

wav_srate( sr ),

143

jc( 0 ),

144

jc_delete( false ),

145

tc( 0 ),

146

tc_delete( false ),

147

tcdh( 0 )

148

149

{

150

// make the default voice module with the supplied parameters

151

addVoiceModule( basenames, uttDir, wavDir, pmDir, coefDir,

152

wav_srate,

153

uttExt, wavExt, pmExt, coefExt );

154

155

diphone_backoff_rules = 0;

156

}

157

158

void DiphoneUnitVoice::initialise( bool ignore_bad_tag )

159

{

160

if( jc == 0 )

161

EST_error(EST_error_where = __null), (*EST_error_func)( "Need to set join cost calculator for voice" );

162

163

if( tc == 0 )

164

EST_error(EST_error_where = __null), (*EST_error_func)( "Need to set target cost calculator for voice" );

165

166

EST_TList<DiphoneVoiceModule*>::Entries it;

167

168

for( it.begin(voiceModules); it; it++ )

169

(*it)->initialise( tc, ignore_bad_tag );

170

}

171

172

bool DiphoneUnitVoice::addVoiceModule( const EST_StrList& basenames,

173

const EST_String& uttDir,

174

const EST_String& wavDir,

175

const EST_String& pmDir,

176

const EST_String& coefDir,

177

unsigned int srate,

178

const EST_String& uttExt,

179

const EST_String& wavExt,

180

const EST_String& pmExt,

181

const EST_String& coefExt )

182

183

{

184

DiphoneVoiceModule *vm;

185

186

if( srate != wav_srate )

187

EST_error(EST_error_where = __null), (*EST_error_func)( "Voice samplerate: %d\nmodule samplerate: %d",

188

wav_srate, srate );

189

190

vm = new DiphoneVoiceModule( basenames, uttDir, wavDir, pmDir, coefDir,

191

srate,

192

uttExt, wavExt, pmExt, coefExt );

193

CHECK_PTR(vm)if((vm)==0){ (EST_error_where = __null), (*EST_error_func)("memory allocation failed (file %s, line %d)"
, "DiphoneUnitVoice.cc",193);};

194

195

registerVoiceModule( vm );

196

197

return true;

198

}

199

200

201

void DiphoneUnitVoice::registerVoiceModule( DiphoneVoiceModule *vm )

202

{

203

voiceModules.append( vm );

204

}

205

206

207

void DiphoneUnitVoice::setJoinCost( EST_JoinCost *jcost, bool del )

208

{

209

if( jc_delete == true )

210

if( jc != 0 )

211

delete jc;

212

213

jc = jcost;

214

jc_delete = del;

215

}

216

217

void DiphoneUnitVoice::setTargetCost( EST_TargetCost *tcost, bool del )

218

{

219

if( tc_delete == true )

220

if( tc != 0 )

221

delete tc;

222

223

tc = tcost;

224

tc_delete = del;

225

}

226

227

228

DiphoneUnitVoice::~DiphoneUnitVoice()

229

{

230

EST_TList<DiphoneVoiceModule*>::Entries it;

231

232

for( it.begin(voiceModules); it; it++ )

233

delete( *it );

234

235

if(diphone_backoff_rules)

236

delete diphone_backoff_rules;

237

238

if( jc_delete == true )

239

if( jc != 0 )

240

delete jc;

241

242

if( tc_delete == true )

243

if( tc != 0 )

244

delete tc;

245

246

if(tcdh)

247

delete tcdh;

248

249

}

250

251

252

void DiphoneUnitVoice::addToCatalogue( const EST_Utterance *utt )

253

{

254

// needed?

255

}

256

257

258

void DiphoneUnitVoice::getDiphone( const EST_VTCandidate *cand,

259

EST_Track* coef, EST_Wave* sig, int *midframe,

260

bool extendLeft, bool extendRight )

261

{

262

// The need for this function in this class is a bit messy, it would be far

263

// nicer just to be able to ask the Candidate itself to hand over the relevant

264

// synthesis parameters. In future, it will work that way ;)

265

266

// put there by DiphoneVoiceModule::getCandidateList

267

const DiphoneCandidate *diphcand = diphonecandidate( cand->name );

268

269

const DiphoneVoiceModule* parentModule = diphcand->dvm;

270

EST_Item *firstPhoneInDiphone = cand->s;

271

272

// need to call right getDiphone to do the actual work

273

parentModule->getDiphone( firstPhoneInDiphone, coef, sig, midframe, extendLeft, extendRight );

274

}

275

276

// REQUIREMENT: the unit relation must have previously been used to initialise the

277

// Viterbi decoder from which the path was produced.

278

void DiphoneUnitVoice::fillUnitRelation( EST_Relation *units, const EST_VTPath *path )

279

{

280

EST_Item *it=units->tail();

281

282

for ( ; path != 0 && it != 0; path=path->from, it=it->prev() ){

283

EST_Track *coefs = new EST_Track;

284

CHECK_PTR(coefs)if((coefs)==0){ (EST_error_where = __null), (*EST_error_func)
("memory allocation failed (file %s, line %d)", "DiphoneUnitVoice.cc"
,284);};

285

EST_Wave *sig = new EST_Wave;

286

CHECK_PTR(sig)if((sig)==0){ (EST_error_where = __null), (*EST_error_func)("memory allocation failed (file %s, line %d)"
, "DiphoneUnitVoice.cc",286);};

287

int midf;

288

289

getDiphone( path->c, coefs, sig, &midf,

290

it->f_present("extendLeft"), it->f_present("extendRight"));

291

292

EST_Item *firstPhoneInDiphone = path->c->s;

293

it->set_val( "sig", est_val( sig ) );

294

it->set_val( "coefs", est_val( coefs ) );

295

it->set( "middle_frame", midf );

296

it->set( "source_utt", firstPhoneInDiphone->relation()->utt()->f.S("fileid"));

297

it->set_val( "source_ph1", est_val( firstPhoneInDiphone ));

298

it->set( "source_end", firstPhoneInDiphone->F("end"));

299

it->set( "target_cost", path->c->score );

300

301

//have to recalculate join cost as it's not currently saved anywhere

302

if( path->from == 0 )

303

it->set( "join_cost", 0.0);

304

else{

305

// join cost between right edge of left diphone and vice versa

306

const DiphoneCandidate *l_diph = diphonecandidate(path->from->c->name);

307

const DiphoneCandidate *r_diph = diphonecandidate(path->c->name);

308

309

it->set( "join_cost", (*jc)( l_diph, r_diph ) );

310

}

311

}

312

}

313

314

// The use of the globalFunctionPtr in this function is a really just a temporary hack

315

// necessary because the decoder as it stands at present can only take a function pointer

316

// (would be better to relax this restriction in the EST_Viterbi_Decoder class, or in a

317

// replacement class, rather than using this hack)

318

// static EST_VTPath* extendPath( EST_VTPath *p, EST_VTCandidate *c,

319

// EST_Features&)

320

// {

321

// EST_VTPath *np = new EST_VTPath;

322

// CHECK_PTR(np);

323

324

// if( globalTempVoicePtr ==0 )

325

// EST_error( "globalTempVoicePtr is not set, can't continue" );

326

327

// const EST_JoinCost &jcost = globalTempVoicePtr->getJoinCostCalculator();

328

329

// np->c = c;

330

// np->from = p;

331

// np->state = c->pos;

332

333

// if ((p == 0) || (p->c == 0))

334

// np->score = c->score;

335

// else{

336

// // join cost between right edge of left diphone and vice versa

337

// np->score = p->score + c->score + jcost( p->c->s->next(), c->s );

338

// }

339

// return np;

340

// }

341

static EST_VTPath* extendPath( EST_VTPath *p, EST_VTCandidate *c,

342

EST_Features&)

343

{

344

EST_VTPath *np = new EST_VTPath;

345

CHECK_PTR(np)if((np)==0){ (EST_error_where = __null), (*EST_error_func)("memory allocation failed (file %s, line %d)"
, "DiphoneUnitVoice.cc",345);};

346

347

if( globalTempVoicePtr ==0 )

348

EST_error(EST_error_where = __null), (*EST_error_func)( "globalTempVoicePtr is not set, can't continue" );

349

350

const EST_JoinCost &jcost = globalTempVoicePtr->getJoinCostCalculator();

351

352

np->c = c;

353

np->from = p;

354

np->state = c->pos;

355

356

if ((p == 0) || (p->c == 0))

357

np->score = c->score;

358

else{

359

const DiphoneCandidate *l_diph = diphonecandidate(p->c->name);

360

const DiphoneCandidate *r_diph = diphonecandidate(c->name);

361

362

// join cost between right edge of left diphone and vice versa

363

np->score = p->score + c->score + jcost( l_diph, r_diph );

364

}

365

return np;

366

}

367

368

// This function is a really just a temporary hack necessary because the decoder

369

// as it stands at present can only take a function pointer (would be better to relax

370

// this restriction in the EST_Viterbi_Decoder class, or in a replacement class, rather

371

// than using this hack)

372

static EST_VTCandidate* getCandidatesFunction( EST_Item *s,

373

EST_Features &f)

374

{

375

DiphoneUnitVoice *duv = globalTempVoicePtr;

'duv' initialized here

→

376

if( duv==0 )

←

Assuming 'duv' is equal to null

→

←

Taking true branch

→

377

EST_error(EST_error_where = __null), (*EST_error_func)( "Candidate source voice is unset" );

378

379

return duv->getCandidates( s, f );

←

Called C++ object pointer is null

380

}

381

382

// Function which, given an item from the timeline relation that

383

// was originally used to initialise the EST_Viterbi_Decoder

384

// returns a pointer to a linked list of EST_VTCandidates

385

// (this is provided to the viterbi decoder upon its construction

386

// and (in)directly called by it as part of the decoding process...)

387

EST_VTCandidate* DiphoneUnitVoice::getCandidates( EST_Item *s,

388

EST_Features &f) const

389

{

390

EST_VTCandidate *c = 0;

391

EST_VTCandidate *moduleListHead = 0;

392

EST_VTCandidate *moduleListTail = 0;

393

394

// these objects [c/sh]ould be a parameter visible in the user's script

395

// land, and will be in future...

396

397

// tc now a member

398

// EST_DefaultTargetCost default_target_cost;

399

// EST_TargetCost *tc = &default_target_cost;

400

// or

401

// EST_SchemeTargetCost scheme_target_cost(rintern( "targetcost"));

402

// EST_TargetCost *tc = &scheme_target_cost;

403

404

EST_TList<DiphoneVoiceModule*>::Entries module_iter;

405

int nfound, total=0;

406

407

////////////////////////////////////////////////////////////////

408

// join linked list of candidates from each module into one list

409

for( module_iter.begin(voiceModules); module_iter; module_iter++ ){

410

nfound = (*module_iter)->getCandidateList( *s,

411

tc,

412

tcdh,

413

tc_weight,

414

&moduleListHead,

415

&moduleListTail );

416

if( nfound>0 ){

417

moduleListTail->next = c;

418

c = moduleListHead;

419

total += nfound;

420

}

421

}

422

423

if( total==0 )

424

EST_error(EST_error_where = __null), (*EST_error_func)( "Couldn't find diphone %s", (const char*)s->S("name") );

425

426

if( verbosity() > 0 )

427

printf( "Number of candidates found for target \"%s\": %d\n",

428

(const char*)s->S("name"), total );

429

430

if( ! ((tc_rescoring_beam == -1.0) || (tc_rescoring_weight <= 0.0)) )

431

rescoreCandidates( c, tc_rescoring_beam, tc_rescoring_weight );

432

433

return c;

434

}

435

436

void DiphoneUnitVoice::diphoneCoverage(const EST_String filename) const

437

{

438

439

EST_DiphoneCoverage dc;

440

EST_TList<DiphoneVoiceModule*>::Entries module_iter;

441

442

// for each module

443

for( module_iter.begin(voiceModules); module_iter; module_iter++ )

444

(*module_iter)->getDiphoneCoverageStats(&dc);

445

446

dc.print_stats(filename);

447

448

}

449

450

451

452

bool DiphoneUnitVoice::synthesiseWave( EST_Utterance *utt )

453

{

454

getUnitSequence( utt );

455

456

return true;

457

}

458

459

460

461

void DiphoneUnitVoice::getUnitSequence( EST_Utterance *utt )

462

{

463

EST_Relation *segs = utt->relation( "Segment" );

464

EST_Relation *units = utt->create_relation( "Unit" );

465

466

if(!tcdh)

467

tcdh = new TCDataHash(20);

468

else

469

tcdh->clear();

470

471

// Initialise the Unit relation time index for decoder

472

EST_String diphone_name;

473

EST_StrList missing_diphones;

474

475

EST_Item *it=segs->head();

476

if( it == 0 )

477

EST_error(EST_error_where = __null), (*EST_error_func)( "Segment relation is empty" );

478

479

bool extendLeftFlag = false;

480

for( ; it->next(); it=it->next() )

481

{

482

EST_String l = it->S("name");

483

EST_String r = it->next()->S("name");

484

485

EST_String diphone_name = EST_String::cat(l,"_",r);

486

EST_String orig = diphone_name;

487

488

if(tc->is_flatpack())

489

tcdh->add_item( it , ((EST_FlatTargetCost *)tc)->flatpack(it) );

490

491

492

// First attempt back off:

493

// If missing diphone is an interword diphone, insert a silence!

494

// Perceptual results say this is prefered.

495

496

if ( diphone_name != EST_String::Empty &&

497

!this->unitAvailable(diphone_name) )

498

{

499

EST_Item *s1,*s2;

500

EST_Item *w1=0,*w2=0;

501

502

cout << "Missing diphone: "<< diphone_name << endl;

503

504

if((s1 = parent(it,"SylStructure")))

505

w1= parent(s1,"SylStructure");

506

if( (s2 = parent(it->next(),"SylStructure")))

507

w2= parent(s2,"SylStructure");

508

509

if( w1 && w2 && (w1 != w2) )

510

{

511

EST_Item *sil;

512

513

cout << " Interword so inseting silence.\n";

514

515

sil = it->insert_after();

516

sil->set("name",ph_silence());

517

518

r = it->next()->S("name");

519

diphone_name = EST_String::cat(l,"_",r);

520

521

}

522

}

523

524

525

// Simple back off.

526

// Change diphone name for one we actually have.

527

528

while(diphone_name != EST_String::Empty &&

529

!this->unitAvailable(diphone_name) &&

530

diphone_backoff_rules)

531

{

532

533

cout << " diphone still missing, backing off: " << diphone_name << endl;

534

535

diphone_name = diphone_backoff_rules->backoff(l,r);

536

l = diphone_name.before("_");

537

r = diphone_name.after("_");

538

539

cout << " backed off: " << orig << " -> " << diphone_name << endl;

540

541

if( verbosity() > 0 ){

542

EST_warning(EST_error_where = __null), (*EST_warning_func)("Backing off requested diphone %s to %s",

543

orig.str(),

544

diphone_name.str() );

545

}

546

}

547

548

549

//// Complex backoff. Changes the segment stream to the right,

550

//// may still leave a discontinuity to the left. This could be

551

//// fixed, but it would requires a better search. Rob's thoughts

552

//// are that the simple method works better, unless it resorts to

553

//// a bad default rule.

554

555

556

// while(!this->unitAvailable(diphone_name) &&

557

// diphone_backoff_rules &&

558

// !diphone_backoff_rules->backoff(it))

559

// diphone_name = EST_String::cat(it->S("name"),"_",it->next()->S("name"));

560

561

if( !this->unitAvailable( diphone_name ) ){

562

missing_diphones.append( diphone_name );

563

if(units->tail())

564

units->tail()->set( "extendRight", 1 );

565

extendLeftFlag = true; // trigger for next unit to make up second half of missing diphone

566

}

567

else{

568

EST_Item *t = units->append();

569

t->set( "name", diphone_name );

570

if(orig != diphone_name)

571

t->set( "missing_diphone",orig);

572

t->set_val( "ph1", est_val(it) );

573

if( extendLeftFlag == true ){

574

t->set( "extendLeft", 1 );

575

extendLeftFlag = false;

576

}

577

}

578

}

579

580

// stop if necessary units are still missing.

581

if( missing_diphones.length() > 0 ){

582

for( EST_Litem *it=missing_diphones.head(); it!=0 ; it=it->next() )

583

printf( "requested diphone missing: %s\n", missing_diphones(it).str() );

584

585

EST_warning(EST_error_where = __null), (*EST_warning_func)("Making phone joins to compensate...");

586

// EST_error("Unable to synthesise utterance due to missing diphones");

587

}

588

589

// Make the decoder do its thing

590

// -1 means number of states at each time point not fixed

591

EST_Viterbi_Decoder v( getCandidatesFunction, extendPath, -1 );

592

593

// turn on pruning if necessary

594

if( (pruning_beam>0) || (ob_pruning_beam>0) )

595

v.set_pruning_parameters( pruning_beam, ob_pruning_beam );

596

597

// temporary hack necessary because decoder can only take a

598

// function pointer (would be better to relax this restriction in

599

// the EST_Viterbi_Decoder class, or in a replacement class, rather

600

// than using this hack)

601

globalTempVoicePtr = this;

602

603

v.set_big_is_good(false);

604

605

if( verbosity() > 0 )

606

v.turn_on_trace();

607

608

v.initialise( units );

609

v.search();

610

611

// take hold of the best path (end thereof)

612

EST_VTPath *bestp=0;

613

if( !v.result( &bestp ) )

614

EST_error(EST_error_where = __null), (*EST_error_func)( "No best candidate sequence found" );

615

616

// fill in the best path features in the Unit Relation

617

fillUnitRelation( units, bestp );

618

619

my_parse_diphone_times( *units, *segs );

620

}

621

622

623

/////////////////////////////////////////////////////////////////////////////////////

624

// Canned example experimental code (proof of concept rather than intelligently done)

625

626

static inline bool itemListContainsItem( const ItemList* il, const EST_Item *item )

627

{

628

ItemList::Entries it;

629

630

for( it.begin( *il ); it; it++ )

631

if( (*it) == item )

632

return true;

633

634

return false;

635

}

636

637

638

static EST_VTCandidate* getCandidatesWithOmissionsFunction( EST_Item *s, EST_Features &f )

639

{

640

DiphoneUnitVoice *duv = globalTempVoicePtr;

641

if( duv==0 )

642

EST_error(EST_error_where = __null), (*EST_error_func)( "Candidate source voice is unset" );

643

644

//get candidate list as usual

645

EST_VTCandidate *candlist = duv->getCandidates( s, f );

646

647

//filter out candidates on basis of omission list (yes, this is quite dumb)

648

if( s->f_present( "omitlist" ) ){

649

650

EST_warning(EST_error_where = __null), (*EST_warning_func)( "omitlist found in unit %s", s->S("name").str() );

651

652

ItemList *omitlist = itemlist( s->f("omitlist") );

653

654

//until one candidate remains as head (to keep hold of list head)

655

while( candlist != 0 && itemListContainsItem( omitlist, candlist->s ) ){

656

EST_VTCandidate *del_cand = candlist;

657

candlist = candlist->next;

658

del_cand->next = 0; //so deletion doesn't trigger total list deletion

659

delete del_cand;

660

}

661

662

//then continue down list

663

EST_VTCandidate *prev = candlist;

664

EST_VTCandidate *cand = candlist->next;

665

while( cand!=0 ){

666

if( itemListContainsItem( omitlist, cand->s ) ){ //delete cand on true

667

prev->next = cand->next;

668

cand->next = 0; //so deletion doesn't trigger total list deletion

669

delete cand;

670

cand = prev;

671

}

672

cand = cand->next;

673

}

674

675

if( candlist == 0 )

676

EST_error(EST_error_where = __null), (*EST_error_func)( "zero candidates remain after filtering" );

677

678

}

679

680

return candlist;

681

}

682

683

// For when the utterance already has the unit sequence, with certain candidates

684

// flagged as to be avoided, or mandatory and so on...

685

void DiphoneUnitVoice::regetUnitSequence( EST_Utterance *utt )

686

{

687

// Unit relation should already be in existence for decoder

688

EST_Relation *units = utt->relation( "Unit" );

689

EST_Item *it=units->head();

690

if( it == 0 )

691

EST_error(EST_error_where = __null), (*EST_error_func)( "Unit relation is empty" );

692

693

// Make the decoder do its thing (again)

694

// -1 means number of states at each time point not fixed

695

EST_Viterbi_Decoder v( getCandidatesWithOmissionsFunction, extendPath, -1 );

696

697

// turn on pruning if necessary

698

if( (pruning_beam>0) || (ob_pruning_beam>0) )

699

v.set_pruning_parameters( pruning_beam, ob_pruning_beam );

700

701

// temporary hack necessary because decoder can only take a

702

// function pointer (would be better to relax this restriction in

703

// the EST_Viterbi_Decoder class, or in a replacement class, rather

704

// than using this hack)

705

globalTempVoicePtr = this;

706

707

v.set_big_is_good(false);

708

709

if( verbosity() > 0 )

710

v.turn_on_trace();

711

712

v.initialise( units );

713

v.search();

714

715

// take hold of the best path (end thereof)

716

EST_VTPath *bestp=0;

717

if( !v.result( &bestp ) )

718

EST_error(EST_error_where = __null), (*EST_error_func)( "No best candidate sequence found" );

719

720

// fill in the best path features in the Unit Relation

721

fillUnitRelation( units, bestp );

722

723

EST_Relation *segs = utt->relation("Segment");

724

my_parse_diphone_times( *units, *segs );

725

}

726

727

// End canned example experimental code ///////////////////////////////////////////

728

///////////////////////////////////////////////////////////////////////////////////

729

730

731

bool DiphoneUnitVoice::unitAvailable( const EST_String &diphone ) const

732

{

733

EST_TList<DiphoneVoiceModule*>::Entries it;

734

735

for( it.begin(voiceModules); it; it++ )

736

if( (*it)->numAvailableCandidates(diphone) > 0 )

737

return true;

738

739

return false;

740

}

741

742

unsigned int DiphoneUnitVoice::numAvailableCandidates( const EST_String &diphone ) const

743

{

744

unsigned int number = 0;

745

EST_TList<DiphoneVoiceModule*>::Entries it;

746

747

for( it.begin(voiceModules); it; it++ )

748

number += (*it)->numAvailableCandidates(diphone);

749

750

return number;

751

}

752

753

754

////////////////////////////////////////////////////////////////////////

755

////////////////////////////////////////////////////////////////////////

756

// special case of the above for utterances structures that are

757

// actually in the voice database, which doesn't do any search

758

// This is useful for doing copy synthesis of utterances (eg.

759

// to test out resynthesis, prosodic modification and so on)

760

void DiphoneUnitVoice::getCopyUnitUtterance( const EST_String &utt_fname,

761

EST_Utterance **utt_out ) const

762

{

763

// need to find which, if any, voice module has this utterance

764

// in its list

765

EST_TList<DiphoneVoiceModule*>::Entries module_iter;

766

EST_Utterance *db_utt=0;

767

for( module_iter.begin(voiceModules); module_iter; module_iter++ )

768

if( (*module_iter)->getUtterance(&db_utt, "fileid", utt_fname) == true )

769

break;

770

771

if( db_utt == 0 )

772

EST_error(EST_error_where = __null), (*EST_error_func)( "Could not find Utterance %s in any voice module",

773

utt_fname.str() );

774

else{

775

// deep copy database utterance and fill in Unit relation

776

*utt_out = new EST_Utterance( *db_utt );

777

CHECK_PTR(utt_out)if((utt_out)==0){ (EST_error_where = __null), (*EST_error_func
)("memory allocation failed (file %s, line %d)", "DiphoneUnitVoice.cc"
,777);};

778

779

EST_Utterance myUtt( *db_utt );

780

781

cerr << myUtt.relation_present( "Segment" ) << " "

782

<< myUtt.num_relations() <<endl;

783

784

785

cerr << db_utt->relation_present( "Segment" ) << " "

786

<< (*utt_out)->relation_present( "Segment" ) << " "

787

<< (*utt_out)->num_relations() <<endl;

788

789

790

EST_Relation *segs = (*utt_out)->relation( "Segment" );

791

EST_Relation *units = (*utt_out)->create_relation( "Unit" );

792

793

// Initialise the Unit relation + fill in necessary/suitable

794

// synthesis parameters

795

EST_String ph1, ph2;

796

EST_Item *it = segs->tail();

797

EST_Item *db_utt_seg_it = db_utt->relation( "Segment" )->tail();

798

if( it == 0 )

799

EST_error(EST_error_where = __null), (*EST_error_func)( "Segment relation is empty" );

800

else{

801

ph2 = it->S("name");

802

while( ((it=it->prev())!=0) &&

803

((db_utt_seg_it=db_utt_seg_it->prev())!=0) ){

804

EST_Track *coefs = new EST_Track;

805

CHECK_PTR(coefs)if((coefs)==0){ (EST_error_where = __null), (*EST_error_func)
("memory allocation failed (file %s, line %d)", "DiphoneUnitVoice.cc"
,805);};

806

EST_Wave *sig = new EST_Wave;

807

CHECK_PTR(sig)if((sig)==0){ (EST_error_where = __null), (*EST_error_func)("memory allocation failed (file %s, line %d)"
, "DiphoneUnitVoice.cc",807);};

808

int midf;

809

810

(*module_iter)->getDiphone( db_utt_seg_it, coefs, sig, &midf );

811

812

ph1 = it->S("name");

813

EST_Item *t = units->prepend();

814

t->set( "name", EST_String::cat(ph1,"_",ph2) );

815

t->set_val( "ph1", est_val(it) );

816

t->set_val( "sig", est_val( sig ) );

817

t->set_val( "coefs", est_val( coefs ) );

818

t->set( "middle_frame", midf );

819

t->set( "source_utt", db_utt->f.S("fileid"));

820

t->set_val( "source_ph1", est_val( db_utt_seg_it ));

821

t->set( "source_end", db_utt_seg_it->F("end"));

822

t->set( "target_cost", 0.0 );

823

t->set( "join_cost", 0.0);

824

825

ph2 = ph1;

826

}

827

}

828

my_parse_diphone_times( *units, *segs );

829

830

// this is for copy synthesis, so copy actual timings

831

//for( EST_Item *seg = segs->head(); it!=0; it=it->next() )

832

//seg->set( "end", seg->F("source_end") );

833

}

834

}

835

836

////////////////////////////////////////////////////////////////////////

837

////////////////////////////////////////////////////////////////////////

838

839

840

841

unsigned int DiphoneUnitVoice::numUnitTypes() const

842

{

843

//necessary?

844

return 0;

845

}

846

847

unsigned int DiphoneUnitVoice::numDatabaseUnits() const

848

{

849

unsigned int sum=0;

850

851

EST_TList<DiphoneVoiceModule*>::Entries it;

852

853

for( it.begin( voiceModules ); it; it++ )

854

sum += (*it)->numModuleUnits();

855

856

return sum;

857

}

858

859

860

//////////////////////////////////////////////////////////////////////////

861

862

void DiphoneUnitVoice::set_diphone_backoff(DiphoneBackoff *dbo)

863

{

864

if (diphone_backoff_rules)

865

delete diphone_backoff_rules;

866

diphone_backoff_rules = dbo;

867

}

868

869

870

int DiphoneUnitVoice::getPhoneList( const EST_String &phone, ItemList &list )

871

{

872

unsigned int n=0;

873

874

EST_TList<DiphoneVoiceModule*>::Entries it;

875

for( it.begin( voiceModules ); it; it++ )

876

n += (*it)->getPhoneList( phone, list );

877

878

return n;

879

}

880

881

882

883

void DiphoneUnitVoice::precomputeJoinCosts( const EST_StrList &phones, bool verbose )

884

{

885

EST_StrList::Entries it;

886

for( it.begin( phones ); it; it++ ){

887

ItemList *l = new ItemList;

888

CHECK_PTR(l)if((l)==0){ (EST_error_where = __null), (*EST_error_func)("memory allocation failed (file %s, line %d)"
, "DiphoneUnitVoice.cc",888);};

889

890

unsigned int n = getPhoneList( (*it), *l );

891

892

if( verbose==true )

893

cerr << "phone " << (*it) << " " << n << " instances\n";

894

895

if( n>0 ){

896

jc->computeAndCache( *l, true ); //verbose=true

897

}

898

else

899

EST_warning(EST_error_where = __null), (*EST_warning_func)( "Phone %s not listed in voice", (*it).str() );

900

901

delete l;

902

}

903

}