speech-tools/wfst__train_8cc_source.html

 /*************************************************************************/
 /*                                                                       */
 /*                  Language Technologies Institute                      */
 /*                     Carnegie Mellon University                        */
 /*                      Copyright (c) 1999-2003                          */
 /*                        All Rights Reserved.                           */
 /*                                                                       */
 /*  Permission is hereby granted, free of charge, to use and distribute  */
 /*  this software and its documentation without restriction, including   */
 /*  without limitation the rights to use, copy, modify, merge, publish,  */
 /*  distribute, sublicense, and/or sell copies of this work, and to      */
 /*  permit persons to whom this work is furnished to do so, subject to   */
 /*  the following conditions:                                            */
 /*   1. The code must retain the above copyright notice, this list of    */
 /*      conditions and the following disclaimer.                         */
 /*   2. Any modifications must be clearly marked as such.                */
 /*   3. Original authors' names are not deleted.                         */
 /*   4. The authors' names are not used to endorse or promote products   */
 /*      derived from this software without specific prior written        */
 /*      permission.                                                      */
 /*                                                                       */
 /*  CARNEGIE MELLON UNIVERSITY AND THE CONTRIBUTORS TO THIS WORK         */
 /*  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      */
 /*  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   */
 /*  SHALL CARNEGIE MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE      */
 /*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    */
 /*  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN   */
 /*  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,          */
 /*  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF       */
 /*  THIS SOFTWARE.                                                       */
 /*                                                                       */
 /*************************************************************************/
 /*                     Author :  Alan W Black                            */
 /*                     Date   :  October 1999                            */
 /*-----------------------------------------------------------------------*/
 /*                                                                       */
 /* Training method to split states of existing WFST based on data to     */
 /* optimize entropy                                                      */
 /*                                                                       */
 /* Confusing as this has nothing to do with the modelling                */
 /* technique known as "maximum entropy"                                  */
 /*                                                                       */
 /*=======================================================================*/
 #include <iostream>
 #include <cstdlib>
 #include "EST_WFST.h"
 #include "wfst_aux.h"
 #include "EST_Token.h"
 #include "EST_simplestats.h"

 using namespace std;

 VAL_REGISTER_TYPE_NODEL(trans,EST_WFST_Transition)
 SIOD_REGISTER_CLASS(trans,EST_WFST_Transition)
 VAL_REGISTER_CLASS(pdf,EST_DiscreteProbDistribution)
 SIOD_REGISTER_CLASS(pdf,EST_DiscreteProbDistribution)

 static LISP *find_state_usage(EST_WFST &wfst, LISP data);
 static double entropy(const EST_WFST_State *s);
 static LISP *find_state_entropies(const EST_WFST &wfst, LISP *data);
 EST_WFST_Transition *find_best_trans_split(EST_WFST &wfst,
                        int split_state,
                        LISP *data);
 static LISP find_best_split(EST_WFST &wfst,
                 int split_state_name,
                 LISP *data);
 static double find_score_if_split(EST_WFST &wfst,
                   int fromstate,
                   EST_WFST_Transition *trans,
                   LISP *data);
 static LISP find_split_pdfs(EST_WFST &wfst,
                 int split_state_name,
                 LISP *data,
                 EST_DiscreteProbDistribution &pdf_all);
 static double score_pdf_combine(EST_DiscreteProbDistribution &a,
                 EST_DiscreteProbDistribution &b,
                 EST_DiscreteProbDistribution &all);
 #if 0
 static void split_state(EST_WFST &wfst, EST_WFST_Transition *trans);
 #endif
 static void split_state(EST_WFST &wfst, LISP trans_list, int ostate);

 LISP load_string_data(EST_WFST &wfst,EST_String &filename)
 {
     // Load in sentences into data table, assume sentence per line
     EST_TokenStream ts;
     LISP ss = NIL;
     EST_String t;
     int id;
     int i,j;

     if (ts.open(filename) == -1)
     EST_error("wfst_train: failed to read data from \"%s\"",
               (const char *)filename);

     i = 0;
     j = 0;
     while (!ts.eof())
     {
     LISP s = NIL;
     do
     {
         t = (EST_String)ts.get();
         id = wfst.in_symbol(t);
         if (id == -1)
         {
         cerr << "wfst_train: data contains unknown symbol \"" <<
             t << "\"" << endl;
         }
         s = cons(flocons(id),s);
         j++;
     }
     while (!ts.eoln() && !ts.eof());
     i++;
     ss = cons(reverse(s),ss);
     }

     printf("wfst_train: loaded %d lines of %d tokens\n",
        i,j);

     return reverse(ss);
 }

 static LISP *find_state_usage(EST_WFST &wfst, LISP data)
 {
     // Builds list of states, and which data points the represent
     LISP *state_data = new LISP[wfst.num_states()];
     static LISP ddd = NIL;
     int s,i,id;
     LISP d,w;
     EST_WFST_Transition *trans;
 //    EST_Litem *tp;

     if (ddd == NIL)
     gc_protect(&ddd);

     ddd = NIL;

     wfst.start_cumulate();   // zero existing weights

     for (i=0; i < wfst.num_states(); i++)
     {
     state_data[i] = NIL;
     ddd = cons(state_data[i],ddd);
 //  // smoothing
 //  for (tp=wfst.state(i)->transitions.head(); tp != 0; tp = tp->next())
 //      wfst.state(i)->transitions(tp)->set_weight(1);
     }

     for (i=0,d=data; d; d=cdr(d),i++)
     {
     s = wfst.start_state();
     for (w=car(d); w; w=cdr(w))
     {
         state_data[s] = cons(w,state_data[s]);
         id = get_c_int(car(w));
         trans = wfst.find_transition(s,id,id);
         if (!trans)
         {
         printf("sentence %d not in language, skipping\n",i);
         continue;
         }
         else
         {
         trans->set_weight(trans->weight()+1);
         s = trans->state();
         }
     }
     }

     wfst.stop_cumulate();
     return state_data;
 }

 static double entropy(const EST_WFST_State *s)
 {
     double sentropy,w;
     EST_Litem *tp;
     for (sentropy=0,tp=s->transitions.head(); tp != 0; tp = tp->next())
     {
     w = s->transitions(tp)->weight();  /* the probability */
     if (w > 0)
         sentropy += w * log(w);
     }
     return -1 * sentropy;
 }

 void wfst_train(EST_WFST &wfst, LISP data)
 {
     LISP *state_data;
     LISP *state_entropies;
     LISP best_trans_list = NIL;
     int c=0,i, max_entropy_state;
     gc_protect(&data);

     while (1)
     {
     // Build table of state to points in data, and cumulate transitions
     state_data = find_state_usage(wfst,data);

     /* find entropy for each state (sorted) */
     state_entropies = find_state_entropies(wfst,state_data);

     max_entropy_state = -1;
     for (i=0; i < wfst.num_states(); i++)
     {
 //      double me = (double)get_c_float(car(state_entropies[i]));
         max_entropy_state = get_c_int(cdr(state_entropies[i]));
 //      printf("trying %d %g\n",max_entropy_state,me);

 //      best_trans = find_best_trans_split(wfst,max_entropy_state,
 //                         state_data);
         best_trans_list = find_best_split(wfst,max_entropy_state,
                           state_data);
         if (best_trans_list != NIL)
         break;
 //      else
 //      printf("No best trans\n");
     }
     delete [] state_entropies;

     if (max_entropy_state == -1)
     {
         printf("No new max_entropy state\n");
         delete [] state_data;
         break;
     }
     if (best_trans_list == NIL)
     {
         printf("No best_trans in max_entropy state\n");
         delete [] state_data;
         break;
     }

         /* for each transition *entering* max_entropy_state */
         /*     find entropy if it were split          */
         /*     find best split                      */

         /* print stats */
         /* some sort of stop check */
     c++;
     printf("c is %d\n",c);
     if (c > 5000)
     {
         printf("reached cycle end %d\n",c);
         delete [] state_data;
         break;
     }
         /* split on best split                      */
         split_state(wfst, best_trans_list, max_entropy_state);

     if ((c % 100) == 0)
     {
         EST_String chkpntname = "chkpnt";
         char bbb[7];
         sprintf(bbb,"%03d",c);
         wfst.save(chkpntname+bbb+".wfst");
     }

     delete [] state_data;
     user_gc(NIL);
     }
 }

 static int me_compare_function(const void *a, const void *b)
 {
     LISP la;
     LISP lb;
     la = *(LISP *)a;
     lb = *(LISP *)b;

     float fa = get_c_float(car(la));
     float fb = get_c_float(car(lb));

     if (fa < fb)
     return 1;
     else if (fa == fb)
     return 0;
     else
     return -1;
 }

 static LISP *find_state_entropies(const EST_WFST &wfst, LISP *data)
 {
     double all_entropy = 0;
     int i;
     double sentropy;
     LISP *slist = new LISP[wfst.num_states()];
     static LISP ddd = NIL;

     if (ddd == NIL)
     gc_protect(&ddd);
     ddd = NIL;

     for (i=0; i < wfst.num_states(); i++)
     {
     const EST_WFST_State *s = wfst.state(i);
     sentropy = entropy(s);
 //  printf("dlength is %d %d\n",i,siod_llength(data[i]));
     all_entropy += sentropy * siod_llength(data[i]);
     slist[i] = cons(flocons(sentropy),flocons(i));
     ddd = cons(slist[i],ddd);
     }
     printf("average entropy is %g\n",all_entropy/i);

     qsort(slist,wfst.num_states(),sizeof(LISP),me_compare_function);

     return slist;
 }

 static LISP find_best_split(EST_WFST &wfst,
                 int split_state_name,
                 LISP *data)
 {
     // Find the best partition of incoming translations that
     // minimises entropy
     EST_DiscreteProbDistribution pdf_all(&wfst.in_symbols());
     EST_DiscreteProbDistribution *a_pdf, *b_pdf;
     LISP splits,s,dd,r;
     LISP *ssplits;
     gc_protect(&splits);
     EST_String sname;
     int b,best_b;
     EST_Litem *i;
     int num_pdfs;
     double best_score, score, sfreq;

     for (dd = data[split_state_name]; dd; dd = cdr(dd))
     pdf_all.cumulate(get_c_int(car(car(dd))));
     splits = find_split_pdfs(wfst,split_state_name,data,pdf_all);
     if (siod_llength(splits) < 2)
     return NIL;
     ssplits = new LISP[siod_llength(splits)];
     for (num_pdfs=0,s=splits; s != NIL; s=cdr(s),num_pdfs++)
     ssplits[num_pdfs] = car(s);

     qsort(ssplits,num_pdfs,sizeof(LISP),me_compare_function);
     // Combine trans pdfs in pdfs until more combination doesn't improve
     while (1)
     {

     best_score = get_c_float(car(ssplits[0]));
     best_b = -1;
     a_pdf = pdf(car(cdr(cdr(ssplits[0]))));
         for (b=1; b < num_pdfs; b++)
     {
         if (ssplits[b] == NIL)
         continue;
         score = score_pdf_combine(*a_pdf,*pdf(car(cdr(cdr(ssplits[b])))),
                       pdf_all);
         if (score < best_score)
         {
         best_score = score;
         best_b = b;
         }
     }

     // combine a and b
     if (best_b == -1)
         break;
     else
     {
         // combine a and b
         // Add trans to 0
         setcar(cdr(ssplits[0]),
            append(car(cdr(ssplits[0])),
               car(cdr(ssplits[best_b]))));
         setcar(ssplits[0], flocons(best_score));
         // Update 0's pdf with values from best_b's
         b_pdf = pdf(car(cdr(cdr(ssplits[best_b]))));
         for (i=b_pdf->item_start(); !b_pdf->item_end(i);
          i = b_pdf->item_next(i))
         {
         b_pdf->item_freq(i,sname,sfreq);
         a_pdf->cumulate(i,sfreq);
         }
         ssplits[best_b] = NIL;
     }

     }

     printf("score %g ",(double)get_c_float(car(ssplits[0])));
     for (dd=car(cdr(ssplits[0])); dd; dd=cdr(dd))
     printf("%s ",(const char *)wfst.in_symbol(trans(car(dd))->in_symbol()));
     printf("\n");
     gc_unprotect(&splits);
     r = car(cdr(ssplits[0]));
     delete [] ssplits;
     return r;
 }

 static double score_pdf_combine(EST_DiscreteProbDistribution &a,
                 EST_DiscreteProbDistribution &b,
                 EST_DiscreteProbDistribution &all)
 {
     // Find score of (a+b) vs (all-(a+b))
     EST_DiscreteProbDistribution ab(a);
     EST_DiscreteProbDistribution all_but_ab(all);
     EST_Litem *i;
     EST_String sname;
     double sfreq, score;
     for (i=b.item_start(); !b.item_end(i);
      i = b.item_next(i))
     {
     b.item_freq(i,sname,sfreq);
     ab.cumulate(i,sfreq);
     }

     for (i=ab.item_start(); !ab.item_end(i);
      i = ab.item_next(i))
     {
     ab.item_freq(i,sname,sfreq);
     all_but_ab.cumulate(i,-1*sfreq);
     }

     score = (ab.entropy() * ab.samples()) +
     (all_but_ab.entropy() * all_but_ab.samples());

     return score;

 }

 static LISP find_split_pdfs(EST_WFST &wfst,
                 int split_state_name,
                 LISP *data,
                 EST_DiscreteProbDistribution &pdf_all)
 {
     // Find following pdfs for each incoming transition as if they where
     // split to a new state
     int i,id, in;
     EST_Litem *tp;
     LISP pdfs = NIL,dd,ttt,p,t;
     EST_DiscreteProbDistribution empty;
     double value;

     for (i=0; i < wfst.num_states(); i++)
     {
     const EST_WFST_State *s = wfst.state(i);
     for (tp=s->transitions.head(); tp != 0; tp = tp->next())
     {
         if ((s->transitions(tp)->state() == split_state_name)
         && (s->transitions(tp)->weight() > 0))
         {
         in = s->transitions(tp)->in_symbol();
         EST_DiscreteProbDistribution *pdf =
             new EST_DiscreteProbDistribution(&wfst.in_symbols());
         for (dd = data[i]; dd; dd = cdr(dd))
         {
             id = get_c_int(car(car(dd)));
             if (id == in)
             {   // This one would go to the new state so we count it
             if (cdr(car(dd))) // not end of data string
                 pdf->cumulate(get_c_int(car(cdr(car(dd)))));
             }
         }
         // value, list of trans, pdf
         value = score_pdf_combine(*pdf,empty,pdf_all);
         if ((value > 0) && // ignore transitions with no data
             (pdf->samples() > 10))// and those with only a few data pnts
         {
             t = siod(s->transitions(tp));
             p = siod(pdf);
             ttt = cons(flocons(value),
                    cons(cons(t,NIL),
                     cons(p,NIL)));
             pdfs = cons(ttt,pdfs);
         }
         else
             delete pdf;
         }
     }
     }
     return pdfs;
 }

 EST_WFST_Transition *find_best_trans_split(EST_WFST &wfst,
                        int split_state_name,
                        LISP *data)
 {
     EST_Litem *tp;
     EST_WFST_Transition *best_trans = 0;
     const EST_WFST_State *split_state = wfst.state(split_state_name);
     double best_score,bb;
     int i;

     best_score = entropy(split_state)*siod_llength(data[split_state_name]);
 //    printf("unsplit score %g\n",best_score);

     /* For each transition going to split_state */
     for (i=1; i < wfst.num_states(); i++)
     {
     const EST_WFST_State *s = wfst.state(i);
     for (tp=s->transitions.head(); tp != 0; tp = tp->next())
     {
         if ((wfst.state(s->transitions(tp)->state()) == split_state) &&
         (s->transitions(tp)->weight() > 0))
         {
         bb = find_score_if_split(wfst,i,s->transitions(tp),data);
 //      cout << i << " "
 //           << wfst.in_symbol(s->transitions(tp)->in_symbol()) << " "
 //           << s->transitions(tp)->state() << " " << bb << endl;
         if (bb == -1)  /* didn't find a split */
             continue;
         if (bb < best_score)
         {
             best_score = bb;
             best_trans = s->transitions(tp);
         }
         }
     }
     }

     if (best_trans)
     cout << "best " << wfst.in_symbol(best_trans->in_symbol()) << " "
          << best_trans->weight() << " "
          << best_trans->state() << " " << best_score << endl;
     return best_trans;
 }

 static double find_score_if_split(EST_WFST &wfst,
                                   int fromstate,
                   EST_WFST_Transition *trans,
                   LISP *data)
 {
     double ent_split;
     double ent_remain;
     double score;
     EST_DiscreteProbDistribution pdf_split(&wfst.in_symbols());
     EST_DiscreteProbDistribution pdf_remain(&wfst.in_symbols());
     int in, tostate, id;
     EST_Litem *i;
     double sfreq;
     EST_String sname;

     ent_split = ent_remain = 32*32*32*32;
     LISP dd;

 //    printf("considering %d %s %g %d\n",
 //     fromstate,
 //     (const char *)wfst.in_symbol(trans->in_symbol()),
 //     trans->weight(),
 //     trans->state());

     /* find entropy of possible new state */
     /* for each data point through fromstate */
     in = trans->in_symbol();
     for (dd = data[fromstate]; dd; dd = cdr(dd))
     {
     id = get_c_int(car(car(dd)));
     if (id == in)
     {   // This one would go to the new state so we count it
         if (cdr(car(dd))) // not end of data string
         pdf_split.cumulate(get_c_int(car(cdr(car(dd)))));
     }
     }
     if (pdf_split.samples() > 0)
     ent_split = pdf_split.entropy();
     /* find entropy of old state minus trans into it */
     tostate = trans->state();
     // Actually only need to do this once per state
     for (dd = data[tostate]; dd; dd = cdr(dd))
     pdf_remain.cumulate(get_c_int(car(car(dd))));
     // Subtract the bit thats split
     for (i=pdf_split.item_start(); !pdf_split.item_end(i);
      i = pdf_split.item_next(i))
     {
     pdf_split.item_freq(i,sname,sfreq);
     pdf_remain.cumulate(i,-1*sfreq);
     }
     if (pdf_remain.samples() > 0)
     ent_remain = pdf_remain.entropy();

     if ((pdf_remain.samples() == 0) ||
     (pdf_split.samples() == 0))
     return -1;

     score = (ent_remain * pdf_remain.samples()) +
     (ent_split * pdf_split.samples());
 //    printf("tostate %d remain %g %d split %g %d score %g\n",
 //     tostate, ent_remain, (int)pdf_remain.samples(),
 //     ent_split, (int)pdf_split.samples(), score);

     return score;
 }

 #if 0
 static void split_state(EST_WFST &wfst, EST_WFST_Transition *trans)
 {
     /* Split off a new state for given trans.  Add transitions    */
     /* to this new state for all transitions in (old) state trans */
     /* goes to                                                    */
     EST_Litem *tp;
     int nstate = wfst.add_state(wfst_final);
     int ostate = trans->state();

 //    printf("state %d entropy %g\n",ostate,entropy(wfst.state(ostate)));
     /* must be done before adding the new transitions to nstate */
     trans->set_state(nstate);

     for (tp=wfst.state(ostate)->transitions.head(); tp != 0; tp = tp->next())
     {
     wfst.state_non_const(nstate)->
         add_transition(0.0,  /* weight will be filled in later*/
                wfst.state(ostate)->transitions(tp)->state(),
                wfst.state(ostate)->transitions(tp)->in_symbol(),
                wfst.state(ostate)->transitions(tp)->out_symbol());

     }
 //    printf(" nstate %d entropy %g\n",nstate,entropy(wfst.state(nstate)));
 //    printf(" ostate %d entropy %g\n",ostate,entropy(wfst.state(ostate)));

 }
 #endif

 static void split_state(EST_WFST &wfst, LISP trans_list, int ostate)
 {
     /* Split off a new state for given trans.  Add transitions    */
     /* to this new state for all transitions in (old) state trans */
     /* goes to                                                    */
     EST_Litem *tp;
     int nstate = wfst.add_state(wfst_final);
     LISP t;

     /* must be done before adding the new transitions to nstate */
     for (t=trans_list; t; t=cdr(t))
     trans(car(t))->set_state(nstate);

     for (tp=wfst.state(ostate)->transitions.head(); tp != 0; tp = tp->next())
     {
     wfst.state_non_const(nstate)->
         add_transition(0.0,  /* weight will be filled in later*/
                wfst.state(ostate)->transitions(tp)->state(),
                wfst.state(ostate)->transitions(tp)->in_symbol(),
                wfst.state(ostate)->transitions(tp)->out_symbol());

     }
 }

siod
LISP siod(const class EST_Val v)
Definition: siod_est.cc:184

EST_WFST_Transition::set_state
void set_state(int s)
Definition: EST_WFST.h:80

EST_TokenStream::get
EST_TokenStream & get(EST_Token &t)
get next token in stream
Definition: EST_Token.cc:499

find_best_trans_split
EST_WFST_Transition * find_best_trans_split(EST_WFST &wfst, int split_state, LISP *data)
Definition: wfst_train.cc:476

qsort
void qsort(EST_TList< T > &a)

EST_DiscreteProbDistribution::item_next
EST_Litem * item_next(EST_Litem *idx) const
Used for iterating through members of the distribution.
Definition: EST_DProbDist.cc:388

EST_TokenStream
Definition: EST_Token.h:239

SIOD_REGISTER_CLASS
#define SIOD_REGISTER_CLASS(NAME, CLASS)
Definition: siod_defs.h:207

EST_DiscreteProbDistribution
Definition: EST_simplestats.h:210

EST_WFST::start_state
int start_state() const
Definition: EST_WFST.h:206

get_c_float
float get_c_float(LISP x)
Definition: slib.cc:1858

EST_WFST::add_state
int add_state(enum wfst_state_type state_type)
Add a new state, returns new name.
Definition: EST_WFST.cc:669

EST_WFST_Transition
an internal class for EST_WFST for representing transitions in an WFST
Definition: EST_WFST.h:61

EST_UItem
Definition: EST_UList.h:49

EST_WFST
a call representing a weighted finite-state transducer
Definition: EST_WFST.h:154

EST_DiscreteProbDistribution::samples
double samples(void) const
Total number of example found.
Definition: EST_simplestats.h:250

wfst_train
void wfst_train(EST_WFST &wfst, LISP data)
Definition: wfst_train.cc:188

VAL_REGISTER_TYPE_NODEL
#define VAL_REGISTER_TYPE_NODEL(NAME, CLASS)
Definition: EST_Val_defs.h:110

get_c_int
long int get_c_int(LISP x)
Definition: slib.cc:1850

NIL
#define NIL
Definition: siod_defs.h:92

siod_llength
int siod_llength(LISP list)
Definition: siod.cc:202

std

append
LISP append(LISP l1, LISP l2)
Definition: slib_list.cc:177

gc_unprotect
void gc_unprotect(LISP *location)
Definition: slib.cc:759

EST_WFST_Transition::state
int state() const
Definition: EST_WFST.h:76

load_string_data
LISP load_string_data(EST_WFST &wfst, EST_String &filename)
Definition: wfst_train.cc:83

VAL_REGISTER_CLASS
#define VAL_REGISTER_CLASS(NAME, CLASS)
Definition: EST_Val_defs.h:62

EST_DiscreteProbDistribution::item_start
EST_Litem * item_start() const
Used for iterating through members of the distribution.
Definition: EST_DProbDist.cc:372

EST_DiscreteProbDistribution::item_end
int item_end(EST_Litem *idx) const
Used for iterating through members of the distribution.
Definition: EST_DProbDist.cc:380

EST_WFST_Transition::weight
float weight() const
Definition: EST_WFST.h:75

user_gc
LISP user_gc(LISP args)
Definition: slib.cc:1269

EST_WFST::in_symbol
int in_symbol(const EST_String &s) const
Map input symbol to input alphabet index.
Definition: EST_WFST.h:208

EST_WFST::start_cumulate
void start_cumulate()
Clear and start cumulation.
Definition: EST_WFST.cc:687

EST_UItem::next
EST_UItem * next()
Definition: EST_UList.h:55

EST_TokenStream::open
int open(const EST_String &filename)
open a EST_TokenStream for a file.
Definition: EST_Token.cc:213

EST_WFST::stop_cumulate
void stop_cumulate()
Stop cumulation and calculate probabilities on transitions.
Definition: EST_WFST.cc:702

wfst_aux.h

EST_TokenStream::eof
int eof()
end of file
Definition: EST_Token.h:362

EST_DiscreteProbDistribution::cumulate
void cumulate(const EST_String &s, double count=1)
Add this observation, may specify number of occurrences.
Definition: EST_DProbDist.cc:159

EST_WFST::in_symbols
const EST_Discrete & in_symbols() const
Accessing the input alphabet.
Definition: EST_WFST.h:233

EST_DiscreteProbDistribution::item_freq
void item_freq(EST_Litem *idx, EST_String &s, double &freq) const
During iteration returns name and frequency given index.
Definition: EST_DProbDist.cc:404

EST_WFST_State
an internal class for EST_WFST used to represent a state in a WFST
Definition: EST_WFST.h:98

EST_DiscreteProbDistribution::entropy
double entropy(void) const
Definition: EST_DProbDist.cc:341

cons
LISP cons(LISP x, LISP y)
Definition: slib_list.cc:97

EST_WFST::state
const EST_WFST_State * state(int i) const
Return internal state information.
Definition: EST_WFST.h:226

EST_WFST.h

EST_WFST::save
EST_write_status save(const EST_String &filename, const EST_String type="ascii")
?
Definition: EST_WFST.cc:353

EST_WFST_Transition::in_symbol
int in_symbol() const
Definition: EST_WFST.h:77

EST_error
#define EST_error
Definition: EST_error.h:104

EST_simplestats.h

setcar
LISP setcar(LISP cell, LISP value)
Definition: slib_list.cc:54

EST_WFST_Transition::set_weight
void set_weight(float f)
Definition: EST_WFST.h:79

EST_WFST::num_states
int num_states() const
Definition: EST_WFST.h:205

EST_WFST::find_transition
EST_WFST_Transition * find_transition(int state, int in, int out) const
Find (first) transition given in and out symbols.
Definition: EST_WFST.cc:271

EST_WFST::state_non_const
EST_WFST_State * state_non_const(int i)
Return internal state information (non-const)
Definition: EST_WFST.h:228

flocons
LISP flocons(double x)
Definition: slib.cc:673

EST_UList::head
EST_UItem * head() const
Definition: EST_UList.h:97

gc_protect
void gc_protect(LISP *location)
Definition: slib.cc:791

car
LISP car(LISP x)
Definition: slib_list.cc:115

empty
EST_StrList empty
Definition: sigpr_example.cc:55

EST_String
EST_String
Definition: EST_features_aux.cc:50

reverse
void reverse(EST_Wave &sig)
Definition: EST_wave_aux.cc:198

wfst_final
Definition: EST_WFST.h:85

EST_Token.h

EST_WFST_State::transitions
wfst_translist transitions
Definition: EST_WFST.h:104

cdr
LISP cdr(LISP x)
Definition: slib_list.cc:124

EST_TokenStream::eoln
int eoln()
end of line
Definition: EST_Token.cc:832

EST_String
Definition: EST_String.h:76