77 for (i = pd.item_start();
81 pd.item_freq(i,s,freq);
82 os << get_path() <<
" " << s <<
" : " << freq << endl;
88 for (t.
begin(nodes); t; t++)
89 pstnode(t->v)->print_freqs(os);
103 os << get_path() <<
" :";
104 for (
EST_Litem *i = pd.item_start(); !pd.item_end(i) ; i=pd.item_next(i))
106 pd.item_prob(i,s,prob);
107 os <<
" " << s <<
" " << prob;
114 for (t.
begin(nodes); t; t++)
115 pstnode(t->v)->print_probs(os);
123 return pd.most_probable(prob);
161 const int index)
const 165 if (words.
n() == index+1){
178 return p_prob_dist(next,words,index+1);
196 if (words.
n()+index < p_order)
197 cerr <<
"EST_PredictionSuffixTree: accumlating window is wtoo small" 201 pd->cumulate(
words(p_order-1+index),count);
202 p_accumulate(nodes,words,count,index);
213 if (words.
n() == index+1)
236 p_accumulate(next,words,count,index+1);
246 double d2 = pd->frequency(
words(order()-1));
257 pd->frequency(
words(order()-1));
265 return ppredict(nodes,words,&p,&state);
272 return ppredict(nodes,words,p,&state);
278 return ppredict(nodes,words,p,state);
284 double *p,
int *state,
285 const int index)
const 288 if (words.
n() == index+1)
305 return ppredict(next,words,p,state,index+1);
314 os <<
"EST_PredictionSuffixTree order=" << p_order << endl;
315 nodes->print_freqs(os);
324 os <<
"EST_PredictionSuffixTree " << p_order << endl;
325 nodes->print_probs(os);
339 ofstream os(filename);
353 if (ts.
open(filename) != 0)
355 cerr <<
"EST_PredictionSuffixTree: failed to open \"" << filename <<
"\" for reading\n";
360 if (ts.
get() !=
"EST_PredictionSuffixTree")
362 cerr <<
"EST_PredictionSuffixTree: file \"" << filename <<
"\" not an EST_PredictionSuffixTree\n";
366 order = atoi(ts.
get().string());
367 if ((order < 1) || (order > 10))
369 cerr <<
"EST_PredictionSuffixTree: file \"" << filename <<
"\" has bad order\n";
375 for (i=0; i<p_order; i++)
381 window[p_order-1] = ts.
get().string();
384 cerr <<
"EST_PredictionSuffixTree: file \"" << filename <<
"\" missed parsed line ";
385 cerr << ts.
linenum() <<
" near EST_PredictionSuffixTree\n";
386 for (i=0; i < order; i++)
387 cout <<
" " << window(i);
391 freq = atoi(ts.
get().string());
392 accumulate(window,freq);
409 else if (ts.
open(filename) == -1)
414 for (i=0; i<p_order-1; i++)
415 window[i] = prev_prev;
416 window[p_order-1] = prev;
418 accumulate(window,1);
425 window[p_order-1] = ts.
get().string();
426 accumulate(window,1);
432 window[p_order-1] = last;
433 accumulate(window,1);
450 for (i=0; i<p_order; i++)
457 window[p_order-1] = input(i_ptr);
458 accumulate(window,1);
476 else if (ts.
open(filename) == -1)
482 for (p.
begin(nodes->nodes); p; p++)
488 for (i=0; i<p_order; i++)
491 int num_tsamples = 0;
496 window[p_order-1] = ts.
get().string();
501 pairs.
add_item(window(p_order-1),predict(window),1);
506 cout <<
"Mean entropy (?) is " << e/num_tsamples << endl;
const EST_DiscreteProbDistribution & p_prob_dist(EST_PredictionSuffixTree_tree_node *node, const EST_StrVector &words, const int index=0) const
EST_TokenStream & get(EST_Token &t)
get next token in stream
void set_val(const EST_String &name, const EST_Val &sval)
double samples(void) const
Total number of example found.
EST_Val est_val(const EST_Item_featfunc f)
void p_accumulate(EST_PredictionSuffixTree_tree_node *node, const EST_StrVector &words, double count, const int index=0)
void set_SingleCharSymbols(const EST_String &sc)
set which characters are to be treated as single character symbols
int get_level(void) const
const EST_DiscreteProbDistribution PSTnullProbDistribution
const EST_String & most_probable(double *p) const
#define VAL_REGISTER_CLASS(NAME, CLASS)
int index(EST_TList< T > &l, T &val, bool(*eq)(const EST_UItem *, const EST_UItem *)=NULL)
void cumulate(const EST_String &s, double count=1)
int get_state(void) const
void build(const EST_String filename, const EST_String prev, const EST_String prev_prev, const EST_String last)
int open(const EST_String &filename)
open a EST_TokenStream for a file.
void print_confusion(const EST_FMatrix &a, EST_StrStr_KVL &list, EST_StrList &lex)
double frequency(const EST_String &s) const
const EST_Val & f(const EST_String &path) const
void accumulate(const EST_StrVector &words, const double count=1, const int index=0)
void slide(EST_IVector &i, const int l)
double entropy(void) const
EST_FMatrix confusion(EST_StrStr_KVL &list, EST_StrList &lex)
int linenum(void) const
returns line number of EST_TokenStream
double rev_prob(const EST_StrVector &words) const
const EST_String PredictionSuffixTree_oov("_OOV_")
const EST_String & predict(const EST_StrVector &words) const
section options Options< strong > or ngram_per_line Pseudo words
const EST_DiscreteProbDistribution & prob_dist() const
const EST_String & ppredict(EST_PredictionSuffixTree_tree_node *node, const EST_StrVector &words, double *prob, int *state, const int index=0) const
int save(const EST_String filename, const EST_PredictionSuffixTree::EST_filetype type=PredictionSuffixTree_ascii)
void append(const T &item)
add item onto end of list
void print_probs(ostream &os)
int load(const EST_String filename)
EST_PredictionSuffixTree()
int add_item(const K &rkey, const V &rval, int no_search=0)
add key-val pair to list
void init(const int order)
~EST_PredictionSuffixTree()
void print_freqs(ostream &os)
void begin(const Container &over)
Set the iterator ready to run over this container.
void test(const EST_String filename)
INLINE ssize_t n() const
number of items in vector.
void set_path(const EST_String &s)
void print_probs(ostream &os)
void print_freqs(ostream &os)
const EST_String & get_path(void) const