me_mlsa.cc

Bug Summary

File:	modules/clustergen/me_mlsa.cc
Location:	line 568, column 15
Description:	Array access (from variable 'mc') results in a null pointer dereference

Annotated Source Code

/**

* The HMM-Based Speech Synthesis System (HTS)

* HTS Working Group

* Department of Computer Science

* Nagoya Institute of Technology

* and

* Interdisciplinary Graduate School of Science and Engineering

* Tokyo Institute of Technology

* Permission is hereby granted, free of charge, to use and

* distribute this software and its documentation without

* restriction, including without limitation the rights to use,

* copy, modify, merge, publish, distribute, sublicense, and/or

* sell copies of this work, and to permit persons to whom this

* work is furnished to do so, subject to the following conditions:

* 1. The source code must retain the above copyright notice,

* this list of conditions and the following disclaimer.

* 2. Any modifications to the source code must be clearly

* marked as such.

* 3. Redistributions in binary form must reproduce the above

* copyright notice, this list of conditions and the

* following disclaimer in the documentation and/or other

* materials provided with the distribution. Otherwise, one

* must contact the HTS working group.

* NAGOYA INSTITUTE OF TECHNOLOGY, TOKYO INSTITUTE OF TECHNOLOGY,

* HTS WORKING GROUP, AND THE CONTRIBUTORS TO THIS WORK DISCLAIM

* ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL

* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT

* SHALL NAGOYA INSTITUTE OF TECHNOLOGY, TOKYO INSTITUTE OF

* TECHNOLOGY, HTS WORKING GROUP, NOR THE CONTRIBUTORS BE LIABLE

* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY

* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,

* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTUOUS

* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR

* PERFORMANCE OF THIS SOFTWARE.

* This software was translated to C for use within Festival to offer

* multi-excitation MLSA

* Alan W Black (awb@cs.cmu.edu) 3rd April 2009

#include <stdio.h>

#include <stdlib.h>

#include <string.h>

#include <math.h>

#include <EST_walloc.h>

#include "festival.h"

#include "mlsa_resynthesis.h"

/**

* Synthesis of speech out of speech parameters.

* Mixed excitation MLSA vocoder.

* Java port and extension of HTS engine version 2.0

* Extension: mixed excitation

* @author Marcela Charfuelan

* And ported to C by Alan W Black (awb@cs.cmu.edu)

#define booleanint int

#define true1 1

#define false0 0

typedef struct HTSData_struct {

int rate;

int fperiod;

double rhos;

int stage;

double alpha;

double beta;

booleanint useLogGain;

double uf;

booleanint algnst; /* use state level alignment for duration */

booleanint algnph; /* use phoneme level alignment for duration */

booleanint useMixExc; /* use Mixed Excitation */

booleanint useFourierMag; /* use Fourier magnitudes for pulse generation */

booleanint useGV; /* use global variance in parameter generation */

booleanint useGmmGV; /* use global variance as a Gaussian Mixture Model */

booleanint useUnitDurationContinuousFeature; /* for using external duration, so it will not be generated from HMMs*/

booleanint useUnitLogF0ContinuousFeature; /* for using external f0, so it will not be generated from HMMs*/

/** variables for controling generation of speech in the vocoder

* these variables have default values but can be fixed and read from the

100

* audio effects component. [Default][min--max] */

101

double length; /* total number of frame for generated speech */

102

/* length of generated speech (in seconds) [N/A][0.0--30.0] */

103

double durationScale; /* less than 1.0 is faster and more than 1.0 is slower, min=0.1 max=3.0 */

104

105

booleanint LogGain;

106

char *PdfStrFile, *PdfMagFile;

107

108

int NumFilters, OrderFilters;

109

double **MixFilters;

110

double F0Std;

111

double F0Mean;

112

113

} HTSData;

114

115

#if 0

116

typedef struct HTSData_struct {

117

118

int rate = 16000;

119

int fperiod = 80;

120

double rhos = 0.0;

121

122

int stage = 0;

123

double alpha = 0.42;

124

booleanint useLogGain = false0;

125

double uf = 0.5;

126

booleanint algnst = false0; /* use state level alignment for duration */

127

booleanint algnph = false0; /* use phoneme level alignment for duration */

128

booleanint useMixExc = true1; /* use Mixed Excitation */

129

booleanint useFourierMag = false0; /* use Fourier magnitudes for pulse generation */

130

booleanint useGV = false0; /* use global variance in parameter generation */

131

booleanint useGmmGV = false0; /* use global variance as a Gaussian Mixture Model */

132

booleanint useUnitDurationContinuousFeature = false0; /* for using external duration, so it will not be generated from HMMs*/

133

booleanint useUnitLogF0ContinuousFeature = false0; /* for using external f0, so it will not be generated from HMMs*/

134

135

/** variables for controling generation of speech in the vocoder

136

* these variables have default values but can be fixed and read from the

137

* audio effects component. [Default][min--max] */

138

double f0Std = 1.0; /* variable for f0 control, multiply f0 [1.0][0.0--5.0] */

139

double f0Mean = 0.0; /* variable for f0 control, add f0 [0.0][0.0--100.0] */

140

double length = 0.0; /* total number of frame for generated speech */

141

/* length of generated speech (in seconds) [N/A][0.0--30.0] */

142

double durationScale = 1.0; /* less than 1.0 is faster and more than 1.0 is slower, min=0.1 max=3.0 */

143

144

} HTSData;

145

#endif

146

147

static int IPERIOD = 1;

148

static booleanint GAUSS = true1;

149

static int PADEORDER = 5; /* pade order for MLSA filter */

150

static int IRLENG = 96; /* length of impulse response */

151

152

/* for MGLSA filter (mel-generalised log spectrum approximation filter) */

153

static booleanint NORMFLG1 = true1;

154

static booleanint NORMFLG2 = false0;

155

static booleanint MULGFLG1 = true1;

156

static booleanint MULGFLG2 = false0;

157

static booleanint NGAIN = false0;

158

159

static double ZERO = 1.0e-10; /* ~(0) */

160

static double LZERO = (-1.0e+10); /* ~log(0) */

161

162

static int stage; /* Gamma=-1/stage : if stage=0 then Gamma=0 */

163

static double xgamma; /* Gamma */

164

static booleanint use_log_gain; /* log gain flag (for LSP) */

165

static int fprd; /* frame shift */

166

static int iprd; /* interpolation period */

167

static booleanint gauss; /* flag to use Gaussian noise */

168

static double p1; /* used in excitation generation */

169

static double pc; /* used in excitation generation */

170

static double *pade; /* used in mlsadf */

171

static int ppade; /* offset for vector ppade */

172

173

static double *C; /* used in the MLSA/MGLSA filter */

174

static double *CC; /* used in the MLSA/MGLSA filter */

175

static double *CINC; /* used in the MLSA/MGLSA filter */

176

static double *D1; /* used in the MLSA/MGLSA filter */

177

static int CINC_length, CC_length, C_length, D1_length;

178

179

static double rate;

180

static int pt1; /* used in mlsadf1 */

181

static int pt2; /* used in mlsadf2 */

182

static int *pt3; /* used in mlsadf2 */

183

184

/* mixed excitation variables */

185

static int numM; /* Number of bandpass filters for mixed excitation */

186

static int orderM; /* Order of filters for mixed excitation */

187

static double **h; /* filters for mixed excitation */

188

static double *xpulseSignal; /* the size of this should be orderM */

189

static double *xnoiseSignal; /* the size of this should be orderM */

190

static booleanint mixedExcitation = false0;

191

static booleanint fourierMagnitudes = false0;

192

193

static booleanint lpcVocoder = false0; /* true if lpc vocoder is used, then the input should be lsp parameters */

194

195

void initVocoder(int mcep_order, int mcep_vsize, HTSData *htsData);

196

int htsMLSAVocoder(EST_Track *lf0Pst,

197

EST_Track *mcepPst,

198

EST_Track *strPst,

199

EST_Track *magPst,

200

int *voiced,

201

HTSData *htsData,

202

EST_Wave *wave);

203

204

205

LISP me_mlsa_resynthesis(LISP ltrack, LISP strack)

206

{

207

/* Resynthesizes a wave from given track with mixed excitation*/

208

EST_Track *t;

209

EST_Track *str_track;

210

EST_Wave *wave = 0;

211

EST_Track *mcep;

212

EST_Track *f0v;

213

EST_Track *str;

214

EST_Track *mag;

215

int *voiced;

216

int sr = 16000;

217

int i,j;

218

double shift;

219

HTSData htsData;

220

221

htsData.alpha = 0.42;

222

htsData.beta = 0.0;

223

224

if ((ltrack == NULL__null) ||

225

(TYPEP(ltrack,tc_string)( (ltrack != __null) && ((((ltrack) == ((struct obj *
) 0)) ? 0 : ((*(ltrack)).type)) == (13)) ) &&

226

(streq(get_c_string(ltrack),"nil")(strcmp(get_c_string(ltrack),"nil")==0))))

227

return siod(new EST_Wave(0,1,sr));

228

229

t = track(ltrack);

230

str_track = track(strack);

231

232

f0v = new EST_Track(t->num_frames(),1);

233

mcep = new EST_Track(t->num_frames(),25);

234

str = new EST_Track(t->num_frames(),5);

235

mag = new EST_Track(t->num_frames(),10);

236

voiced = walloc(int,t->num_frames())((int *)safe_walloc(sizeof(int)*(t->num_frames())));

237

238

for (i=0; i<t->num_frames(); i++)

239

{

240

f0v->a(i) = t->a(i,0);

241

if (f0v->a(i) > 0)

242

voiced[i] = 1;

243

else

244

voiced[i] = 0;

245

for (j=1; j<26; j++)

246

mcep->a(i,j-1) = t->a(i,j);

247

248

for (j=0; j<5; j++)

249

{

250

str->a(i,j) = str_track->a(i,j);

251

}

252

/* printf("awb_debug str %d 0 %f 1 %f 2 %f 3 %f 4 %f\n",

253

i,str->a(i,0),str->a(i,1),str->a(i,2),str->a(i,3),str->a(i,4));*/

254

#if 0

255

for (j=57; j<66; j++)

256

mag->a(i,j-57) = t->a(i,j);

257

#endif

258

}

259

260

if (t->num_frames() > 1)

261

shift = 1000.0*(t->t(1)-t->t(0));

262

else

263

shift = 5.0;

264

265

htsData.alpha = FLONM(siod_get_lval("mlsa_alpha_param",((*siod_get_lval("mlsa_alpha_param", "mlsa: mlsa_alpha_param not set"
)).storage_as.flonum.data)

266

"mlsa: mlsa_alpha_param not set"))((*siod_get_lval("mlsa_alpha_param", "mlsa: mlsa_alpha_param not set"
)).storage_as.flonum.data);

267

htsData.beta = FLONM(siod_get_lval("mlsa_beta_param",((*siod_get_lval("mlsa_beta_param", "mlsa: mlsa_beta_param not set"
)).storage_as.flonum.data)

268

"mlsa: mlsa_beta_param not set"))((*siod_get_lval("mlsa_beta_param", "mlsa: mlsa_beta_param not set"
)).storage_as.flonum.data);

269

htsData.stage = 0;

270

htsData.LogGain = false0;

271

htsData.fperiod = 80;

272

htsData.rate = 16000;

273

htsData.rhos = 0.0;

274

275

htsData.uf = 0.5;

276

htsData.algnst = false0; /* use state level alignment for duration */

277

htsData.algnph = false0; /* use phoneme level alignment for duration */

278

htsData.useMixExc = true1; /* use Mixed Excitation */

279

htsData.useFourierMag = false0; /* use Fourier magnitudes for pulse generation */

280

htsData.useGV = false0; /* use global variance in parameter generation */

281

htsData.useGmmGV = false0; /* use global variance as a Gaussian Mixture Model */

282

htsData.useUnitDurationContinuousFeature = false0; /* for using external duration, so it will not be generated from HMMs*/

283

htsData.useUnitLogF0ContinuousFeature = false0; /* for using external f0, so it will not be generated from HMMs*/

284

285

/** variables for controling generation of speech in the vocoder

286

* these variables have default values but can be fixed and read from the

287

* audio effects component. [Default][min--max] */

288

htsData.F0Std = 1.0; /* variable for f0 control, multiply f0 [1.0][0.0--5.0] */

289

htsData.F0Mean = 0.0; /* variable for f0 control, add f0 [0.0][0.0--100.0] */

290

htsData.length = 0.0; /* total number of frame for generated speech */

291

/* length of generated speech (in seconds) [N/A][0.0--30.0] */

292

htsData.durationScale = 1.0; /* less than 1.0 is faster and more than 1.0 is slower, min=0.1 max=3.0 */

293

294

LISP filters = siod_get_lval("me_mix_filters",

295

"mlsa: me_mix_filters not set");

296

LISP f;

297

int fl;

298

htsData.NumFilters = 5;

299

for (fl=0,f=filters; f; fl++)

300

f=cdr(f);

301

htsData.OrderFilters = fl/htsData.NumFilters;

302

htsData.MixFilters = walloc(double *,htsData.NumFilters)((double * *)safe_walloc(sizeof(double *)*(htsData.NumFilters
)));

303

for (i=0; i < htsData.NumFilters; i++)

304

{

305

htsData.MixFilters[i] = walloc(double,htsData.OrderFilters)((double *)safe_walloc(sizeof(double)*(htsData.OrderFilters))
);

306

for (j=0; j<htsData.OrderFilters; j++)

307

{

308

htsData.MixFilters[i][j] = FLONM(car(filters))((*car(filters)).storage_as.flonum.data);

309

filters = cdr(filters);

310

}

311

}

312

313

wave = new EST_Wave(0,1,sr);

314

315

if (mcep->num_frames() > 0)

316

/* mcep_order and number of deltas */

317

htsMLSAVocoder(f0v,mcep,str,mag,voiced,&htsData,wave);

318

319

delete f0v;

320

delete mcep;

321

delete str;

322

delete mag;

323

delete voiced;

324

325

return siod(wave);

326

}

327

328

/** The initialisation of VocoderSetup should be done when there is already

329

* information about the number of feature vectors to be processed,

330

* size of the mcep vector file, etc. */

331

void initVocoder(int mcep_order, int mcep_vsize, HTSData *htsData)

332

{

333

int vector_size;

334

double xrand;

335

336

stage = htsData->stage;

337

if(stage != 0)

338

xgamma = -1.0 / stage;

339

else

340

xgamma = 0.0;

341

use_log_gain = htsData->LogGain;

342

343

fprd = htsData->fperiod;

344

rate = htsData->rate;

345

iprd = IPERIOD;

346

gauss = GAUSS;

347

348

/* XXX */

349

xrand = rand();

350

351

if(stage == 0 ){ /* for MCP */

352

353

/* mcep_order=74 and pd=PADEORDER=5 (if no HTS_EMBEDDED is used) */

354

vector_size = (mcep_vsize * ( 3 + PADEORDER) + 5 * PADEORDER + 6) - (3 * (mcep_order+1));

355

CINC_length = CC_length = C_length = mcep_order+1;

356

D1_length = vector_size;

357

C = walloc(double,C_length)((double *)safe_walloc(sizeof(double)*(C_length)));

358

CC = walloc(double,CC_length)((double *)safe_walloc(sizeof(double)*(CC_length)));

359

CINC = walloc(double,CINC_length)((double *)safe_walloc(sizeof(double)*(CINC_length)));

360

D1 = walloc(double,D1_length)((double *)safe_walloc(sizeof(double)*(D1_length)));

361

362

vector_size=21;

363

pade = walloc(double,vector_size)((double *)safe_walloc(sizeof(double)*(vector_size)));

364

/* ppade is a copy of pade in mlsadf() function : ppade = &( pade[pd*(pd+1)/2] ); */

365

ppade = PADEORDER*(PADEORDER+1)/2; /* offset for vector pade */

366

pade[0] = 1.0;

367

pade[1] = 1.0;

368

pade[2] = 0.0;

369

pade[3] = 1.0;

370

pade[4] = 0.0;

371

pade[5] = 0.0;

372

pade[6] = 1.0;

373

pade[7] = 0.0;

374

pade[8] = 0.0;

375

pade[9] = 0.0;

376

pade[10] = 1.0;

377

pade[11] = 0.4999273;

378

pade[12] = 0.1067005;

379

pade[13] = 0.01170221;

380

pade[14] = 0.0005656279;

381

pade[15] = 1.0;

382

pade[16] = 0.4999391;

383

pade[17] = 0.1107098;

384

pade[18] = 0.01369984;

385

pade[19] = 0.0009564853;

386

pade[20] = 0.00003041721;

387

388

pt1 = PADEORDER+1;

389

pt2 = ( 2 * (PADEORDER+1)) + (PADEORDER * (mcep_order+2));

390

pt3 = new int[PADEORDER+1];

391

for(int i=PADEORDER; i>=1; i--)

392

pt3[i] = ( 2 * (PADEORDER+1)) + ((i-1)*(mcep_order+2));

393

394

} else { /* for LSP */

395

vector_size = ((mcep_vsize+1) * (stage+3)) - ( 3 * (mcep_order+1));

396

CINC_length = CC_length = C_length = mcep_order+1;

397

D1_length = vector_size;

398

C = walloc(double,C_length)((double *)safe_walloc(sizeof(double)*(C_length)));

399

CC = walloc(double,CC_length)((double *)safe_walloc(sizeof(double)*(CC_length)));

400

CINC = walloc(double,CINC_length)((double *)safe_walloc(sizeof(double)*(CINC_length)));

401

D1 = walloc(double,D1_length)((double *)safe_walloc(sizeof(double)*(D1_length)));

402

}

403

404

/* excitation initialisation */

405

p1 = -1;

406

pc = 0.0;

407

408

} /* method initVocoder */

409

410

411

412

/**

413

* HTS_MLSA_Vocoder: Synthesis of speech out of mel-cepstral coefficients.

414

* This procedure uses the parameters generated in pdf2par stored in:

415

* PStream mceppst: Mel-cepstral coefficients

416

* PStream strpst : Filter bank stregths for mixed excitation

417

* PStream magpst : Fourier magnitudes ( OJO!! this is not used yet)

418

* PStream lf0pst : Log F0

419

420

#if 0

421

AudioInputStream htsMLSAVocoder(HTSParameterGeneration pdf2par, HMMData htsData)

422

{

423

float sampleRate = 16000.0F; //8000,11025,16000,22050,44100

424

int sampleSizeInBits = 16; //8,16

425

int channels = 1; //1,2

426

booleanint signed = true1; //true,false

427

booleanint bigEndian = false0; //true,false

428

AudioFormat af = new AudioFormat(

429

sampleRate,

430

sampleSizeInBits,

431

channels,

432

signed,

433

bigEndian);

434

double [] audio_double = NULL__null;

435

436

audio_double = htsMLSAVocoder(pdf2par.getlf0Pst(), pdf2par.getMcepPst(), pdf2par.getStrPst(), pdf2par.getMagPst(),

437

pdf2par.getVoicedArray(), htsData);

438

439

long lengthInSamples = (audio_double.length * 2 ) / (sampleSizeInBits/8);

440

logger.info("length in samples=" + lengthInSamples );

441

442

/* Normalise the signal before return, this will normalise between 1 and -1 */

443

double MaxSample = MathUtils.getAbsMax(audio_double);

444

for (int i=0; i<audio_double.length; i++)

445

audio_double[i] = 0.3 * ( audio_double[i] / MaxSample );

446

447

DDSAudioInputStream oais = new DDSAudioInputStream(new BufferedDoubleDataSource(audio_double), af);

448

return oais;

449

450

451

} /* method htsMLSAVocoder() */

452

#endif

453

454

static double mlsafir(double x, double *b, int m, double a, double aa, double *d, int _pt3 )

455

{

456

double y = 0.0;

457

int i;

458

459

d[_pt3+0] = x;

460

d[_pt3+1] = aa * d[_pt3+0] + ( a * d[_pt3+1] );

461

462

for(i=2; i<=m; i++){

463

d[_pt3+i] += a * ( d[_pt3+i+1] - d[_pt3+i-1]);

464

}

465

466

for(i=2; i<=m; i++){

467

y += d[_pt3+i] * b[i];

468

}

469

470

for(i=m+1; i>1; i--){

471

d[_pt3+i] = d[_pt3+i-1];

472

}

473

474

return(y);

475

}

476

477

/** mlsdaf1: sub functions for MLSA filter */

478

static double mlsadf1(double x, double *b, int m, double a, double aa, double *d)

479

{

480

double v;

481

double out = 0.0;

482

int i;

483

//pt1 --> pt = &d1[pd+1]

484

485

for(i=PADEORDER; i>=1; i--) {

486

d[i] = aa * d[pt1+i-1] + a * d[i];

487

d[pt1+i] = d[i] * b[1];

488

v = d[pt1+i] * pade[ppade+i];

489

490

//x += (1 & i) ? v : -v;

491

if(i == 1 || i == 3 || i == 5)

492

x += v;

493

else

494

x += -v;

495

out += v;

496

}

497

d[pt1+0] = x;

498

out += x;

499

500

return(out);

501

502

}

503

504

/** mlsdaf2: sub functions for MLSA filter */

505

static double mlsadf2(double x, double *b, int m, double a, double aa, double *d)

506

{

507

double v;

508

double out = 0.0;

509

int i;

510

// pt2 --> pt = &d1[pd * (m+2)]

511

// pt3 --> pt = &d1[ 2*(pd+1) ]

512

513

for(i=PADEORDER; i>=1; i--) {

514

d[pt2+i] = mlsafir(d[(pt2+i)-1], b, m, a, aa, d, pt3[i]);

515

v = d[pt2+i] * pade[ppade+i];

516

517

if(i == 1 || i == 3 || i == 5)

518

x += v;

519

else

520

x += -v;

521

out += v;

522

523

}

524

d[pt2+0] = x;

525

out += x;

526

527

return out;

528

}

529

530

/** mlsadf: HTS Mel Log Spectrum Approximation filter */

531

static double mlsadf(double x, double *b, int m, double a, double aa, double *d)

532

{

533

534

x = mlsadf1(x, b, m, a, aa, d);

535

x = mlsadf2(x, b, m-1, a, aa, d);

536

537

return x;

538

}

539

540

541

/** uniform_rand: generate uniformly distributed random numbers 1 or -1 */

542

static double uniformRand()

543

{

544

double x;

545

546

x = rand(); /* double uniformly distributed between 0.0 <= Math.random() < 1.0.*/

547

if(x >= RAND_MAX2147483647/2.0)

548

return 1.0;

549

else

550

return -1.0;

551

}

552

553

/** mc2b: transform mel-cepstrum to MLSA digital filter coefficients */

554

static void mc2b(double *mc, double *b, int m, double a )

555

{

556

557

b[m] = mc[m];

558

for(m--; m>=0; m--) {

559

b[m] = mc[m] - a * b[m+1];

560

}

561

}

562

563

/** b2mc: transform MLSA digital filter coefficients to mel-cepstrum */

564

static void b2mc(double *b, double *mc, int m, double a)

565

{

566

double d, o;

567

int i;

568

d = mc[m] = b[m];

←

Array access (from variable 'mc') results in a null pointer dereference

569

for(i=m--; i>=0; i--) {

570

o = b[i] + (a * d);

571

d = b[i];

572

mc[i] = o;

573

}

574

}

575

576

577

/** freqt: frequency transformation */

578

//private void freqt(double c1[], int m1, int cepIndex, int m2, double a){

579

static void freqt(double *c1, int m1, double *c2, int m2, double a)

580

{

581

double *freqt_buff=NULL__null; /* used in freqt */

582

int freqt_size=0; /* buffer size for freqt */

583

int i, j;

584

double b = 1 - a * a;

585

int g; /* offset of freqt_buff */

586

587

if(m2 > freqt_size) {

588

freqt_buff = walloc(double,m2 + m2 + 2)((double *)safe_walloc(sizeof(double)*(m2 + m2 + 2)));

589

freqt_size = m2;

590

}

591

g = freqt_size +1;

592

593

for(i = 0; i < m2+1; i++)

594

freqt_buff[g+i] = 0.0;

595

596

for(i = -m1; i <= 0; i++){

597

if(0 <= m2 )

598

freqt_buff[g+0] = c1[-i] + a * (freqt_buff[0] = freqt_buff[g+0]);

599

if(1 <= m2)

600

freqt_buff[g+1] = b * freqt_buff[0] + a * (freqt_buff[1] = freqt_buff[g+1]);

601

602

for(j=2; j<=m2; j++)

603

freqt_buff[g+j] = freqt_buff[j-1] + a * ( (freqt_buff[j] = freqt_buff[g+j]) - freqt_buff[g+j-1]);

604

605

}

606

607

/* move memory */

608

for(i=0; i<m2+1; i++)

609

c2[i] = freqt_buff[g+i];

610

611

if (freqt_buff)

612

wfree(freqt_buff);

613

614

}

615

616

/** c2ir: The minimum phase impulse response is evaluated from the minimum phase cepstrum */

617

static void c2ir(double *c, int nc, double *hh, int leng )

618

{

619

int n, k, upl;

620

double d;

621

622

hh[0] = exp(c[0]);

623

for(n = 1; n < leng; n++) {

624

d = 0;

625

upl = (n >= nc) ? nc - 1 : n;

626

for(k = 1; k <= upl; k++ )

627

d += k * c[k] * hh[n - k];

628

hh[n] = d / n;

629

}

630

}

631

632

/** b2en: functions for postfiltering */

633

static double b2en(double *b, int m, double a)

634

{

635

double *spectrum2en_buff=NULL__null; /* used in spectrum2en */

'spectrum2en_buff' initialized to a null pointer value

→

636

int spectrum2en_size=0; /* buffer size for spectrum2en */

637

double en = 0.0;

638

int i;

639

double *cep, *ir;

640

641

if(spectrum2en_size < m) {

←

Assuming 'spectrum2en_size' is >= 'm'

→

←

Taking false branch

→

642

spectrum2en_buff = walloc(double,(m+1) + 2 * IRLENG)((double *)safe_walloc(sizeof(double)*((m+1) + 2 * IRLENG)));

643

spectrum2en_size = m;

644

}

645

cep = walloc(double,(m+1) + 2 * IRLENG)((double *)safe_walloc(sizeof(double)*((m+1) + 2 * IRLENG))); /* CHECK! these sizes!!! */

646

ir = walloc(double,(m+1) + 2 * IRLENG)((double *)safe_walloc(sizeof(double)*((m+1) + 2 * IRLENG)));

647

648

b2mc(b, spectrum2en_buff, m, a);

←

Passing null pointer value via 2nd parameter 'mc'

→

←

Calling 'b2mc'

→

649

/* freqt(vs->mc, m, vs->cep, vs->irleng - 1, -a);*/

650

freqt(spectrum2en_buff, m, cep, IRLENG-1, -a);

651

/* HTS_c2ir(vs->cep, vs->irleng, vs->ir, vs->irleng); */

652

c2ir(cep, IRLENG, ir, IRLENG);

653

en = 0.0;

654

655

for(i = 0; i < IRLENG; i++)

656

en += ir[i] * ir[i];

657

658

if (spectrum2en_buff)

659

wfree(spectrum2en_buff);

660

wfree(cep);

661

wfree(ir);

662

663

return(en);

664

}

665

666

/** ignorm: inverse gain normalization */

667

static void ignorm(double *c1, double *c2, int m, double ng)

668

{

669

double k;

670

int i;

671

if(ng != 0.0 ) {

672

k = pow(c1[0], ng);

673

for(i=m; i>=1; i--)

674

c2[i] = k * c1[i];

675

c2[0] = (k - 1.0) / ng;

676

} else {

677

/* movem */

678

for(i=1; i<m; i++)

679

c2[i] = c1[i];

680

c2[0] = log(c1[0]);

681

}

682

}

683

684

/** ignorm: gain normalization */

685

static void gnorm(double *c1, double *c2, int m, double g)

686

{

687

double k;

688

int i;

689

if(g != 0.0) {

690

k = 1.0 + g * c1[0];

691

for(; m>=1; m--)

692

c2[m] = c1[m] / k;

693

c2[0] = pow(k, 1.0 / g);

694

} else {

695

/* movem */

696

for(i=1; i<=m; i++)

697

c2[i] = c1[i];

698

c2[0] = exp(c1[0]);

699

}

700

701

}

702

703

/** lsp2lpc: transform LSP to LPC. lsp[1..m] --> a=lpc[0..m] a[0]=1.0 */

704

static void lsp2lpc(double *lsp, double *a, int m)

705

{

706

double *lsp2lpc_buff=NULL__null; /* used in lsp2lpc */

707

int lsp2lpc_size=0; /* buffer size of lsp2lpc */

708

int i, k, mh1, mh2, flag_odd;

709

double xx, xf, xff;

710

int p, q; /* offsets of lsp2lpc_buff */

711

int a0, a1, a2, b0, b1, b2; /* offsets of lsp2lpc_buff */

712

713

flag_odd = 0;

714

if(m % 2 == 0)

715

mh1 = mh2 = m / 2;

716

else {

717

mh1 = (m+1) / 2;

718

mh2 = (m-1) / 2;

719

flag_odd = 1;

720

}

721

722

if(m > lsp2lpc_size){

723

lsp2lpc_buff = walloc(double,5 * m + 6)((double *)safe_walloc(sizeof(double)*(5 * m + 6)));

724

lsp2lpc_size = m;

725

}

726

727

/* offsets of lsp2lpcbuff */

728

p = m;

729

q = p + mh1;

730

a0 = q + mh2;

731

a1 = a0 + (mh1 +1);

732

a2 = a1 + (mh1 +1);

733

b0 = a2 + (mh1 +1);

734

b1 = b0 + (mh2 +1);

735

b2 = b1 + (mh2 +1);

736

737

/* move lsp -> lsp2lpc_buff */

738

for(i=0; i<m; i++)

739

lsp2lpc_buff[i] = lsp[i+1];

740

741

for (i = 0; i < mh1 + 1; i++)

742

lsp2lpc_buff[a0 + i] = 0.0;

743

for (i = 0; i < mh1 + 1; i++)

744

lsp2lpc_buff[a1 + i] = 0.0;

745

for (i = 0; i < mh1 + 1; i++)

746

lsp2lpc_buff[a2 + i] = 0.0;

747

for (i = 0; i < mh2 + 1; i++)

748

lsp2lpc_buff[b0 + i] = 0.0;

749

for (i = 0; i < mh2 + 1; i++)

750

lsp2lpc_buff[b1 + i] = 0.0;

751

for (i = 0; i < mh2 + 1; i++)

752

lsp2lpc_buff[b2 + i] = 0.0;

753

754

/* lsp filter parameters */

755

for (i = k = 0; i < mh1; i++, k += 2)

756

lsp2lpc_buff[p + i] = -2.0 * cos(lsp2lpc_buff[k]);

757

for (i = k = 0; i < mh2; i++, k += 2)

758

lsp2lpc_buff[q + i] = -2.0 * cos(lsp2lpc_buff[k + 1]);

759

760

/* impulse response of analysis filter */

761

xx = 1.0;

762

xf = xff = 0.0;

763

764

for (k = 0; k <= m; k++) {

765

if (flag_odd == 1) {

766

lsp2lpc_buff[a0 + 0] = xx;

767

lsp2lpc_buff[b0 + 0] = xx - xff;

768

xff = xf;

769

xf = xx;

770

} else {

771

lsp2lpc_buff[a0 + 0] = xx + xf;

772

lsp2lpc_buff[b0 + 0] = xx - xf;

773

xf = xx;

774

}

775

776

for (i = 0; i < mh1; i++) {

777

lsp2lpc_buff[a0 + i + 1] = lsp2lpc_buff[a0 + i] + lsp2lpc_buff[p + i] * lsp2lpc_buff[a1 + i] + lsp2lpc_buff[a2 + i];

778

lsp2lpc_buff[a2 + i] = lsp2lpc_buff[a1 + i];

779

lsp2lpc_buff[a1 + i] = lsp2lpc_buff[a0 + i];

780

}

781

782

for (i = 0; i < mh2; i++) {

783

lsp2lpc_buff[b0 + i + 1] = lsp2lpc_buff[b0 + i] + lsp2lpc_buff[q + i] * lsp2lpc_buff[b1 + i] + lsp2lpc_buff[b2 + i];

784

lsp2lpc_buff[b2 + i] = lsp2lpc_buff[b1 + i];

785

lsp2lpc_buff[b1 + i] = lsp2lpc_buff[b0 + i];

786

}

787

788

if (k != 0)

789

a[k - 1] = -0.5 * (lsp2lpc_buff[a0 + mh1] + lsp2lpc_buff[b0 + mh2]);

790

xx = 0.0;

791

}

792

793

for (i = m - 1; i >= 0; i--)

794

a[i + 1] = -a[i];

795

a[0] = 1.0;

796

797

if (lsp2lpc_buff)

798

wfree(lsp2lpc_buff);

799

}

800

801

/** gc2gc: generalized cepstral transformation */

802

static void gc2gc(double *c1, int m1, double g1, double *c2, int m2, double g2)

803

{

804

double *gc2gc_buff=NULL__null; /* used in gc2gc */

805

int gc2gc_size=0; /* buffer size for gc2gc */

806

int i, min, k, mk;

807

double ss1, ss2, cc;

808

809

if( m1 > gc2gc_size ) {

810

gc2gc_buff = walloc(double,m1 + 1)((double *)safe_walloc(sizeof(double)*(m1 + 1))); /* check if these buffers should be created all the time */

811

gc2gc_size = m1;

812

}

813

814

/* movem*/

815

for(i=0; i<(m1+1); i++)

816

gc2gc_buff[i] = c1[i];

817

818

c2[0] = gc2gc_buff[0];

819

820

for( i=1; i<=m2; i++){

821

ss1 = ss2 = 0.0;

822

min = m1 < i ? m1 : i - 1;

823

for(k=1; k<=min; k++){

824

mk = i - k;

825

cc = gc2gc_buff[k] * c2[mk];

826

ss2 += k * cc;

827

ss1 += mk * cc;

828

}

829

830

if(i <= m1)

831

c2[i] = gc2gc_buff[i] + (g2 * ss2 - g1 * ss1) / i;

832

else

833

c2[i] = (g2 * ss2 - g1 * ss1) / i;

834

}

835

836

if (gc2gc_buff)

837

wfree(gc2gc_buff);

838

}

839

840

/** mgc2mgc: frequency and generalized cepstral transformation */

841

static void mgc2mgc(double *c1, int m1, double a1, double g1, double *c2, int m2, double a2, double g2)

842

{

843

double a;

844

845

if(a1 == a2){

846

gnorm(c1, c1, m1, g1);

847

gc2gc(c1, m1, g1, c2, m2, g2);

848

ignorm(c2, c2, m2, g2);

849

} else {

850

a = (a2 -a1) / (1 - a1 * a2);

851

freqt(c1, m1, c2, m2, a);

852

gnorm(c2, c2, m2, g1);

853

gc2gc(c2, m2, g1, c2, m2, g2);

854

ignorm(c2, c2, m2, g2);

855

856

}

857

}

858

859

/** lsp2mgc: transform LSP to MGC. lsp=C[0..m] mgc=C[0..m] */

860

static void lsp2mgc(double *lsp, double *mgc, int m, double alpha)

861

{

862

int i;

863

/* lsp2lpc */

864

lsp2lpc(lsp, mgc, m); /* lsp starts in 1! lsp[1..m] --> mgc[0..m] */

865

if(use_log_gain)

866

mgc[0] = exp(lsp[0]);

867

else

868

mgc[0] = lsp[0];

869

870

/* mgc2mgc*/

871

if(NORMFLG1)

872

ignorm(mgc, mgc, m, xgamma);

873

else if(MULGFLG1)

874

mgc[0] = (1.0 - mgc[0]) * stage;

875

876

if(MULGFLG1)

877

for(i=m; i>=1; i--)

878

mgc[i] *= -stage;

879

880

mgc2mgc(mgc, m, alpha, xgamma, mgc, m, alpha, xgamma); /* input and output is in mgc=C */

881

882

if(NORMFLG2)

883

gnorm(mgc, mgc, m, xgamma);

884

else if(MULGFLG2)

885

mgc[0] = mgc[0] * xgamma + 1.0;

886

887

if(MULGFLG2)

888

for(i=m; i>=1; i--)

889

mgc[i] *= xgamma;

890

891

}

892

893

/** mglsadf: sub functions for MGLSA filter */

894

static double mglsadff(double x, double *b, int m, double a, double *d, int d_offset)

895

{

896

int i;

897

double y;

898

y = d[d_offset+0] * b[1];

899

900

for(i=1; i<m; i++) {

901

d[d_offset+i] += a * (d[d_offset+i+1] -d[d_offset+i-1]);

902

y += d[d_offset+i] * b[i+1];

903

}

904

x -= y;

905

906

for(i=m; i>0; i--)

907

d[d_offset+i] = d[d_offset+i-1];

908

d[d_offset+0] = a * d[d_offset+0] + (1 - a * a) * x;

909

910

return x;

911

}

912

913

static double mglsadf(double x, double *b, int m, double a, int n, double *d)

914

{

915

int i;

916

for(i=0; i<n; i++)

917

x = mglsadff(x, b, m, a, d, (i*(m+1)));

918

919

return x;

920

}

921

922

/** posfilter: postfilter for mel-cepstrum. It uses alpha and beta defined in HMMData */

923

static void postfilter_mcp(double *mcp, int m, double alpha, double beta)

924

{

925

double *postfilter_buff=NULL__null; /* used in postfiltering */

926

int postfilter_size = 0; /* buffer size for postfiltering */

927

928

double e1, e2;

929

int k;

930

931

if(beta > 0.0 && m > 1){

932

if(postfilter_size < m){

933

postfilter_buff = walloc(double,m+1)((double *)safe_walloc(sizeof(double)*(m+1)));

934

postfilter_size = m;

935

}

936

mc2b(mcp, postfilter_buff, m, alpha);

937

e1 = b2en(postfilter_buff, m, alpha);

938

939

postfilter_buff[1] -= beta * alpha * mcp[2];

940

for(k = 2; k < m; k++)

941

postfilter_buff[k] *= (1.0 +beta);

942

e2 = b2en(postfilter_buff, m, alpha);

943

postfilter_buff[0] += log(e1/e2) / 2;

944

b2mc(postfilter_buff, mcp, m, alpha);

945

946

}

947

948

if (postfilter_buff)

949

wfree(postfilter_buff);

950

951

}

952

953

static int modShift(int n, int N)

954

{

955

if( n < 0 )

956

while( n < 0 )

957

n = n + N;

958

else

959

while( n >= N )

960

n = n - N;

961

return n;

962

}

963

964

/** Generate one pitch period from Fourier magnitudes */

965

static double *genPulseFromFourierMag(EST_Track *mag, int n, double f0, booleanint aperiodicFlag)

966

{

967

968

int numHarm = mag->num_channels();

969

int i;

970

int currentF0 = (int)round(f0);

971

int T, T2;

972

double *pulse = NULL__null;

973

974

if(currentF0 < 512)

975

T = 512;

976

else

977

T = 1024;

978

T2 = 2*T;

979

980

/* since is FFT2 no aperiodicFlag or jitter of 25% is applied */

981

982

/* get the pulse */

983

pulse = walloc(double,T)((double *)safe_walloc(sizeof(double)*(T)));

984

EST_FVector real(T2);

985

EST_FVector imag(T2);

986

987

/* copy Fourier magnitudes (Wai C. Chu "Speech Coding algorithms foundation and evolution of standardized coders" pg. 460) */

988

real[0] = real[T] = 0.0; /* DC component set to zero */

989

for(i=1; i<=numHarm; i++){

990

real[i] = real[T-i] = real[T+i] = real[T2-i] = mag->a(n, i-1); /* Symetric extension */

991

imag[i] = imag[T-i] = imag[T+i] = imag[T2-i] = 0.0;

992

}

993

for(i=(numHarm+1); i<(T-numHarm); i++){ /* Default components set to 1.0 */

994

real[i] = real[T-i] = real[T+i] = real[T2-i] = 1.0;

995

imag[i] = imag[T-i] = imag[T+i] = imag[T2-i] = 0.0;

996

}

997

998

/* Calculate inverse Fourier transform */

999

IFFT(real, imag);

1000

1001

/* circular shift and normalise multiplying by sqrt(F0) */

1002

double sqrt_f0 = sqrt((float)currentF0);

1003

for(i=0; i<T; i++)

1004

pulse[i] = real[modShift(i-numHarm,T)] * sqrt_f0;

1005

1006

return pulse;

1007

1008

}

1009

1010

int htsMLSAVocoder(EST_Track *lf0Pst,

1011

EST_Track *mcepPst,

1012

EST_Track *strPst,

1013

EST_Track *magPst,

1014

int *voiced,

1015

HTSData *htsData,

1016

EST_Wave *wave)

1017

{

1018

1019

double inc, x;

1020

double xp=0.0,xn=0.0,fxp,fxn,mix; /* samples for pulse and for noise and the filtered ones */

1021

int i, j, k, m, s, mcepframe, lf0frame, s_double;

1022

double alpha = htsData->alpha;

1023

double beta = htsData->beta;

1024

double aa = 1-alpha*alpha;

1025

int audio_size; /* audio size in samples, calculated as num frames * frame period */

1026

double *audio_double = NULL__null;

1027

double *magPulse = NULL__null; /* pulse generated from Fourier magnitudes */

1028

int magSample, magPulseSize;

1029

booleanint aperiodicFlag = false0;

1030

1031

double *d; /* used in the lpc vocoder */

1032

1033

double f0, f0Std, f0Shift, f0MeanOri;

1034

double *mc = NULL__null; /* feature vector for a particular frame */

1035

double *hp = NULL__null; /* pulse shaping filter, initialised once it is known orderM */

1036

double *hn = NULL__null; /* noise shaping filter, initialised once it is known orderM */

1037

1038

/* Initialise vocoder and mixed excitation, once initialised it is known the order

1039

* of the filters so the shaping filters hp and hn can be initialised. */

1040

m = mcepPst->num_channels();

1041

mc = walloc(double,m)((double *)safe_walloc(sizeof(double)*(m)));

1042

1043

initVocoder(m-1, mcepPst->num_frames(), htsData);

1044

1045

d = walloc(double,m)((double *)safe_walloc(sizeof(double)*(m)));

1046

if (lpcVocoder)

1047

{

1048

/* printf("Using LPC vocoder\n"); */

1049

for(i=0; i<m; i++)

1050

d[i] = 0.0;

1051

}

1052

mixedExcitation = htsData->useMixExc;

1053

fourierMagnitudes = htsData->useFourierMag;

1054

1055

if ( mixedExcitation )

1056

{

1057

numM = htsData->NumFilters;

1058

orderM = htsData->OrderFilters;

1059

1060

xpulseSignal = walloc(double,orderM)((double *)safe_walloc(sizeof(double)*(orderM)));

1061

xnoiseSignal = walloc(double,orderM)((double *)safe_walloc(sizeof(double)*(orderM)));

1062

/* initialise xp_sig and xn_sig */

1063

for(i=0; i<orderM; i++)

1064

xpulseSignal[i] = xnoiseSignal[i] = 0;

1065

1066

h = htsData->MixFilters;

1067

hp = walloc(double,orderM)((double *)safe_walloc(sizeof(double)*(orderM)));

1068

hn = walloc(double,orderM)((double *)safe_walloc(sizeof(double)*(orderM)));

1069

1070

//Check if the number of filters is equal to the order of strpst

1071

//i.e. the number of filters is equal to the number of generated strengths per frame.

1072

#if 0

1073

if(numM != strPst->num_channels()) {

1074

printf("htsMLSAVocoder: error num mix-excitation filters = %d "

1075

" in configuration file is different from generated str order= %d\n",

1076

numM, strPst->num_channels());

1077

}

1078

printf("HMM speech generation with mixed-excitation.\n");

1079

#endif

1080

}

1081

#if 0

1082

else

1083

printf("HMM speech generation without mixed-excitation.\n");

1084

1085

if( fourierMagnitudes && htsData->PdfMagFile != NULL__null)

1086

printf("Pulse generated with Fourier Magnitudes.\n");

1087

else

1088

printf("Pulse generated as a unit pulse.\n");

1089

1090

if(beta != 0.0)

1091

printf("Postfiltering applied with beta=%f",(float)beta);

1092

else

1093

printf("No postfiltering applied.\n");

1094

#endif

1095

1096

/* Clear content of c, should be done if this function is

1097

called more than once with a new set of generated parameters. */

1098

for(i=0; i< C_length; i++)

1099

C[i] = CC[i] = CINC[i] = 0.0;

1100

for(i=0; i< D1_length; i++)

1101

D1[i]=0.0;

1102

1103

f0Std = htsData->F0Std;

1104

f0Shift = htsData->F0Mean;

1105

f0MeanOri = 0.0;

1106

1107

/* XXX */

1108

for (mcepframe=0,lf0frame=0; mcepframe<mcepPst->num_frames(); mcepframe++)

1109

{

1110

if(voiced[mcepframe])

1111

{ /* WAS WRONG */

1112

f0MeanOri = f0MeanOri + lf0Pst->a(mcepframe, 0);

1113

lf0frame++;

1114

}

1115

}

1116

f0MeanOri = f0MeanOri/lf0frame;

1117

1118

/* ____________________Synthesize speech waveforms_____________________ */

1119

/* generate Nperiod samples per mcepframe */

1120

s = 0; /* number of samples */

1121

s_double = 0;

1122

audio_size = mcepPst->num_frames() * (fprd);

1123

audio_double = walloc(double,audio_size)((double *)safe_walloc(sizeof(double)*(audio_size))); /* initialise buffer for audio */

1124

magSample = 1;

1125

magPulseSize = 0;

1126

1127

for(mcepframe=0,lf0frame=0; mcepframe<mcepPst->num_frames(); mcepframe++)

1128

{

1129

/* get current feature vector mcp */

1130

for(i=0; i<m; i++)

1131

mc[i] = mcepPst->a(mcepframe, i);

1132

1133

/* f0 modification through the MARY audio effects */

1134

if(voiced[mcepframe]){

1135

f0 = f0Std * lf0Pst->a(mcepframe, 0) + (1-f0Std) * f0MeanOri + f0Shift;

1136

lf0frame++;

1137

if(f0 < 0.0)

1138

f0 = 0.0;

1139

}

1140

else{

1141

f0 = 0.0;

1142

}

1143

1144

/* if mixed excitation get shaping filters for this frame */

1145

if (mixedExcitation)

1146

{

1147

for(j=0; j<orderM; j++)

1148

{

1149

hp[j] = hn[j] = 0.0;

1150

for(i=0; i<numM; i++)

1151

{

1152

hp[j] += strPst->a(mcepframe, i) * h[i][j];

1153

hn[j] += ( 1 - strPst->a(mcepframe, i) ) * h[i][j];

1154

}

1155

}

1156

}

1157

1158

/* f0->pitch, in original code here it is used p, so f0=p in the c code */

1159

if(f0 != 0.0)

1160

f0 = rate/f0;

1161

1162

/* p1 is initialised in -1, so this will be done just for the first frame */

1163

if( p1 < 0 ) {

1164

p1 = f0;

1165

pc = p1;

1166

/* for LSP */

1167

if(stage != 0){

1168

if( use_log_gain)

1169

C[0] = LZERO;

1170

else

1171

C[0] = ZERO;

1172

for(i=0; i<m; i++ )

1173

C[i] = i * PI3.14159265358979323846 / m;

1174

/* LSP -> MGC */

1175

lsp2mgc(C, C, (m-1), alpha);

1176

mc2b(C, C, (m-1), alpha);

1177

gnorm(C, C, (m-1), xgamma);

1178

for(i=1; i<m; i++)

1179

C[i] *= xgamma;

1180

}

1181

1182

}

1183

1184

if(stage == 0){

1185

/* postfiltering, this is done if beta>0.0 */

1186

postfilter_mcp(mc, (m-1), alpha, beta);

1187

/* mc2b: transform mel-cepstrum to MLSA digital filter coefficients */

1188

mc2b(mc, CC, (m-1), alpha);

1189

for(i=0; i<m; i++)

1190

CINC[i] = (CC[i] - C[i]) * iprd / fprd;

1191

} else {

1192

1193

lsp2mgc(mc, CC, (m-1), alpha );

1194

1195

mc2b(CC, CC, (m-1), alpha);

1196

1197

gnorm(CC, CC, (m-1), xgamma);

1198

1199

for(i=1; i<m; i++)

1200

CC[i] *= xgamma;

1201

1202

for(i=0; i<m; i++)

1203

CINC[i] = (CC[i] - C[i]) * iprd / fprd;

1204

1205

}

1206

1207

/* p=f0 in c code!!! */

1208

if( p1 != 0.0 && f0 != 0.0 ) {

1209

inc = (f0 - p1) * (double)iprd/(double)fprd;

1210

//System.out.println(" inc=(f0-p1)/80=" + inc );

1211

} else {

1212

inc = 0.0;

1213

pc = f0;

1214

p1 = 0.0;

1215

}

1216

1217

/* Here need to generate both xp:pulse and xn:noise signals seprately*/

1218

gauss = false0; /* Mixed excitation works better with nomal noise */

1219

1220

/* Generate fperiod samples per feature vector, normally 80 samples per frame */

1221

//p1=0.0;

1222

gauss=false0;

1223

for(j=fprd-1, i=(iprd+1)/2; j>=0; j--) {

1224

if(p1 == 0.0) {

1225

if(gauss)

1226

x = 0 /* rand.nextGaussian() */; /* XXX returns double, gaussian distribution mean=0.0 and var=1.0 */

1227

else

1228

x = uniformRand(); /* returns 1.0 or -1.0 uniformly distributed */

1229

1230

if(mixedExcitation) {

1231

xn = x;

1232

xp = 0.0;

1233

}

1234

} else {

1235

if( (pc += 1.0) >= p1 ){

1236

if(fourierMagnitudes){

1237

/* jitter is applied just in voiced frames when the stregth of the first band is < 0.5*/

1238

/* this will work just if Radix FFT is used */

1239

/*if(strPst.getPar(mcepframe, 0) < 0.5)

1240

aperiodicFlag = true;

1241

else

1242

aperiodicFlag = false;

1243

magPulse = genPulseFromFourierMagRadix(magPst, mcepframe, p1, aperiodicFlag);

1244

1245

1246

magPulse = genPulseFromFourierMag(magPst, mcepframe, p1, aperiodicFlag);

1247

magSample = 0;

1248

magPulseSize = -27 /* magPulse.length*/; /** XXX **/

1249

x = magPulse[magSample];

1250

magSample++;

1251

} else

1252

x = sqrt(p1);

1253

1254

pc = pc - p1;

1255

} else {

1256

1257

if(fourierMagnitudes){

1258

if(magSample >= magPulseSize ){

1259

x = 0.0;

1260

}

1261

else

1262

x = magPulse[magSample];

1263

magSample++;

1264

} else

1265

x = 0.0;

1266

}

1267

1268

if(mixedExcitation) {

1269

xp = x;

1270

if(gauss)

1271

xn = 0 /* rand.nextGaussian() */ ; /* XXX */

1272

else

1273

xn = uniformRand();

1274

}

1275

}

1276

1277

/* apply the shaping filters to the pulse and noise samples */

1278

/* i need memory of at least for M samples in both signals */

1279

if(mixedExcitation) {

1280

fxp = 0.0;

1281

fxn = 0.0;

1282

for(k=orderM-1; k>0; k--) {

1283

fxp += hp[k] * xpulseSignal[k];

1284

fxn += hn[k] * xnoiseSignal[k];

1285

xpulseSignal[k] = xpulseSignal[k-1];

1286

xnoiseSignal[k] = xnoiseSignal[k-1];

1287

}

1288

fxp += hp[0] * xp;

1289

fxn += hn[0] * xn;

1290

xpulseSignal[0] = xp;

1291

xnoiseSignal[0] = xn;

1292

1293

/* x is a pulse noise excitation and mix is mixed excitation */

1294

mix = fxp+fxn;

1295

1296

/* comment this line if no mixed excitation, just pulse and noise */

1297

x = mix; /* excitation sample */

1298

/* printf("awb_debug me %d %f\n",(int)(s_double),(float)x); */

1299

}

1300

1301

if(lpcVocoder){

1302

// LPC filter C[k=0] = gain is not used!

1303

if(!NGAIN)

1304

x *= C[0];

1305

for(k=(m-1); k>1; k--){

1306

x = x - (C[k] * d[k]);

1307

d[k] = d[k-1];

1308

}

1309

x = x - (C[1] * d[1]);

1310

d[1] = x;

1311

1312

} else if(stage == 0 ){

1313

if(x != 0.0 )

1314

x *= exp(C[0]);

1315

x = mlsadf(x, C, m, alpha, aa, D1);

1316

1317

} else {

1318

if(!NGAIN)

1319

x *= C[0];

1320

x = mglsadf(x, C, (m-1), alpha, stage, D1);

1321

}

1322

1323

audio_double[s_double] = x;

1324

s_double++;

1325

1326

if((--i) == 0 ) {

1327

p1 += inc;

1328

for(k=0; k<m; k++){

1329

C[k] += CINC[k];

1330

}

1331

i = iprd;

1332

}

1333

} /* for each sample in a period fprd */

1334

1335

p1 = f0;

1336

1337

/* move elements in c */

1338

/* HTS_movem(v->cc, v->c, m + 1); */

1339

for(i=0; i<m; i++){

1340

C[i] = CC[i];

1341

}

1342

1343

} /* for each mcep frame */

1344

1345

/* printf("Finish processing %d mcep frames.\n",mcepframe); */

1346

1347

wave->resize(audio_size,1);

1348

for (i=0; i<s_double; i++)

1349

wave->a(i) = (short)audio_double[i];

1350

1351

return 0;

1352

1353

} /* method htsMLSAVocoder() */

1354

1355