56 method = op.
S(
"pda_method");
60 else if (method ==
"srpd")
63 EST_error(
"Unknown pda %s\n", (
const char *)method);
73 method = op.
S(
"pda_method");
76 srpd(sig, raw_fz, op);
77 else if (method ==
"srpd")
78 srpd(sig, raw_fz, op);
80 EST_error(
"Unknown pda %s\n", (
const char *)method);
89 default_srpd_op(&srpd_op);
90 parse_srpd_list(op, &srpd_op);
92 if (op.
I(
"do_low_pass",0))
95 srpd(sig, fz, srpd_op, op.
I(
"srpd_resize", 0));
108 ssize_t i, rns, tracklen, j = 0;
111 STATUS_ pda_status, held_status;
134 EST_error(
"Pitch tracking algorithm must have equal spaced track\n");
140 for (i = 0; i < cc.
size; cc.
coeff[i++] = 0.0);
204 srpd->
L = al.
I(
"decimation");
209 if (al.
present(
"pda_frame_shift"))
210 srpd->
shift = al.
F(
"pda_frame_shift") * 1000.0;
211 if (al.
present(
"pda_frame_length"))
212 srpd->
length = al.
F(
"pda_frame_length") * 1000.0;
214 srpd->
Tsilent = al.
I(
"noise_floor");
215 if (al.
present(
"v2uv_coeff_thresh"))
216 srpd->
Thigh = al.
F(
"v2uv_coef_thresh");
217 if (al.
present(
"min_v2uv_coef_thresh"))
218 srpd->
Tmin = al.
F(
"min_v2uv_coef_thresh");
219 if (al.
present(
"v2uv_coef_thresh_ratio"))
221 if (al.
present(
"anti_doubling_thresh"))
222 srpd->
Tdh = al.
F(
"anti_doubling_thresh");
223 if (al.
present(
"peak_tracking"))
225 if (al.
present(
"sample_frequency"))
231 al.
set(
"min_pitch",
"40.0");
232 al.
set(
"max_pitch",
"400.0");
233 al.
set(
"pda_frame_shift",
"0.005");
235 al.
set(
"lpf_cutoff",
"600");
236 al.
set(
"lpf_order",
"49");
237 al.
set(
"f0_file_type",
"esps");
244 al.
set(
"peak_tracking", 0);
252 "-L Perform low pass filtering on input. This option should always \n" 253 " be used in normal processing as it usually increases \n" 254 " performance considerably\n\n" 255 "-P perform peak tracking\n\n" 256 "-fmin <float> miniumum F0 value. Sets the minimum allowed F0 in \n" 258 " Changing this to suit the speaker usually increases \n" 259 " performance. Typical recommended values are 60-90Hz for\n" 260 " males and 120-150Hz for females\n\n" 261 "-fmax <float> maxiumum F0 value. Sets the maximum allowed F0 in \n" 263 " Changing this to suit the speaker usually increases \n" 264 " performance. Typical recommended values are 200Hz for \n" 265 " males and 300-400Hz for females\n\n" 266 "-shift <float> frame spacing in seconds for fixed frame analysis. \n" 267 " This doesn't have to be the same as the output file spacing - \n" 268 " the -S option can be used to resample the track before saving \n" 270 "-length <float> analysis frame length in seconds.\n" 272 "-lpfilter <int> Low pass filter, with cutoff frequency in Hz \n" 273 " Filtering is performed by a FIR filter which is built at run \n" 274 " time. The order of the filter can be given by -forder. The \n" 275 " default value is 199\n\n" 276 "-forder <int> Order of FIR filter used for lpfilter and \n" 277 " hpfilter. This must be ODD. Sensible values range \n" 278 " from 19 (quick but with a shallow rolloff) to 199 \n" 279 " (slow but with a steep rolloff). The default is 199.\n\n";
287 "-d <float> decimation factor\n" 288 " set down-sampling for quicker computation so that only one in \n" 289 " <parameter>decimation factor</parameter> samples are used in the first instance. \n" 290 " Must be in the range of one to ten inclusive. Default is four. \n" 291 " For data sampled at 10kHz, it is advised that a decimation \n" 292 " factor of two isselected.\n\n" 294 "-n <float> Inoise floor.\n" 295 " Set the maximum absolute signal amplitude that represents \n" 296 " silence to <parameter>Inoise floor</parameter>. If the absolute amplitude of \n" 297 " the first segment in a given frame is below this level at all \n" 298 " times, then the frame is classified as representing silence. \n" 299 " Must be a positive number. Default is 120 ADC units.\n\n" 301 "-H <float> unvoiced to voiced coeff threshold\n" 302 " set the correlation coefficient threshold which must be \n" 303 " exceeded in a transition from an unvoiced classified frame \n" 304 " of speech to a voiced frame as the unvoiced to voiced coeff \n" 305 " threshold. Must be in the range zero to one inclusive. \n" 306 " Default is 0.88.\n\n" 308 "-m <float> min voiced to unvoiced coeff threshold \n" 309 " set the minimum allowed correlation coefficient threshold \n" 310 " which must not be exceeded in a transition from a voiced \n" 311 " classified frame of speech to an unvoiced frame, as \n" 312 " <parameter>min voiced to unvoiced coeff threshold</parameter>. Must be in the \n" 313 " range zero to <parameter>unvoiced to voiced coeff threshold</parameter> \n" 314 " inclusive. Default is 0.75.\n\n" 316 "-R <float> voiced to unvoiced coeff threshold-ratio \n" 317 " set the scaling factor used in determining the correlation\n" 318 " coefficient threshold which must not be exceeded in a voiced \n" 319 " frame to unvoiced frame transition, as <parameter>voiced to unvoiced</parameter> \n" 320 " coeff threshold -ratio. The voiced to unvoiced coefficient \n" 321 " threshold is determined by multiplying this scaling factor \n" 322 " with the maximum cross-correlation coefficient of the \n" 323 " previously voiced frame. If this product is less than \n" 324 " <parameter>min voiced to unvoiced coeff threshold</parameter> then this is used \n" 325 " instead. Must be in the range zero to one inclusive. \n" 326 " Default is 0.85.\n\n" 328 "-t <float> anti pitch doubling/halving threshold\n" 329 " set the threshold used in eliminating (as far as possible) \n" 330 " pitch doubling and pitch halving errors as <parameter>anti pitch \n" 331 " double/halving threshold</parameter>. Must be in the range zero to \n" 332 " one inclusive. Default is 0.77.\n\n";
A class for storing digital waveforms. The waveform is stored as an array of 16 bit shorts...
void icda(EST_Wave &sig, EST_Track &fz, EST_Track &speech, EST_Features &op, EST_String method)
void end_structure_use(SEGMENT_ *p_seg, CROSS_CORR_ *p_cc)
void set_channel_name(const EST_String &name, int channel)
set the name of the channel.
void set_break(ssize_t i)
set frame i to be a break
void FIRlowpass_filter(EST_Wave &sigin, int freq, int order=DEFAULT_FILTER_ORDER)
void initialise_status(struct Srpd_Op *p, STATUS_ *p_status)
ssize_t num_samples() const
return the number of samples in the waveform
void set(const EST_String &name, int ival)
void resize(ssize_t num_frames, int num_channels, bool preserve=1)
EST_String options_pda_general(void)
EST_String options_pda_srpd(void)
EST_String ftoString(float n, int pres=3, int width=0, int l=0)
Make a EST_String object from an float, with variable precision.
void super_resolution_pda(struct Srpd_Op *paras, SEGMENT_ seg, CROSS_CORR_ *p_cc, STATUS_ *p_status)
const EST_String S(const EST_String &path) const
#define DEFAULT_MAX_PITCH
void srpd(EST_Wave &sig, EST_Track &fz, EST_Features &op)
void default_pda_options(EST_Features &al)
bool equal_space() const
return true if track has equal (i.e. fixed) frame spacing */
float & a(ssize_t i, int c=0)
void smooth_phrase(EST_Track &c, EST_Track &speech, EST_Features &options, EST_Track &sm)
#define DEFAULT_MIN_PITCH
int present(const EST_String &name) const
#define DEFAULT_TMAX_RATIO
void pda(EST_Wave &sig, EST_Track &fz, EST_Features &op, EST_String method)
int sample_rate() const
return the sampling rate (frequency)
#define DEFAULT_DECIMATION
int read_next_wave_segment(EST_Wave &sig, struct Srpd_Op *paras, SEGMENT_ *p_seg)
int I(const EST_String &path) const
void initialise_structures(struct Srpd_Op *p, SEGMENT_ *p_seg, CROSS_CORR_ *p_cc)
void set_equal_space(bool t)
void fill_time(float t, int start=1)
float F(const EST_String &path) const
Utility EST_String Functions header file.