54 static double fs_find_backoff_prob(
EST_Ngrammar *backoff_ngrams,
71 delete [] backoff_ngrams;
82 for (i=0; i < ngram.
order()-1; i++)
98 for (j=0; j < ngram.
order()-1; j++)
102 for (l=0; l < j; l++)
123 cerr <<
"Ngrammar: can only ptsmooth dense ngrammars" << endl;
144 words[words.
n()-1] = name;
146 fs_find_backoff_prob(backoff_ngrams,
159 static double fs_find_backoff_prob(
EST_Ngrammar *backoff_ngrams,
172 for(i=0; i<order; i++)
173 nnn[order-1-i] =
words(words.
n()-1-i);
175 if (backoff_ngrams[order-1].frequency(nnn) < smooth_thresh)
176 return fs_find_backoff_prob(backoff_ngrams,
177 order-1,words,smooth_thresh);
int fs_backoff_smooth(EST_Ngrammar *backoff_ngrams, EST_Ngrammar &ngram, int smooth_thresh)
EST_Litem * item_next(EST_Litem *idx) const
Used for iterating through members of the distribution.
double samples(void) const
Total number of example found.
void accumulate(const EST_StrVector &words, const double count=1)
EST_Litem * item_start() const
Used for iterating through members of the distribution.
EST_Discrete * pred_vocab
int item_end(EST_Litem *idx) const
Used for iterating through members of the distribution.
EST_NgrammarState * p_states
int num_states(void) const
EST_DiscreteProbDistribution & pdf()
double probability(const EST_StrVector &words, bool force=false, const bool trace=false) const
void resize(ssize_t n, int set=1)
void item_freq(EST_Litem *idx, EST_String &s, double &freq) const
During iteration returns name and frequency given index.
representation_t representation() const
section options Options< strong > or ngram_per_line Pseudo words
bool Good_Turing_smooth(EST_Ngrammar &ngrammar, int maxcount, int mincount)
void set_frequency(const EST_String &s, double c)
const EST_StrVector & make_ngram_from_index(const int i) const
void fs_build_backoff_ngrams(EST_Ngrammar *backoff_ngrams, EST_Ngrammar &ngram)
INLINE ssize_t n() const
number of items in vector.
void Ngram_freqsmooth(EST_Ngrammar &ngram, int smooth_thresh1, int smooth_thresh2)