Edinburgh Speech Tools  2.1-release
EST_PST.h
Go to the documentation of this file.
1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1996 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author : Alan W Black */
34 /* Date : July 1996 */
35 /*-----------------------------------------------------------------------*/
36 /* */
37 /* A general class for PredictionSuffixTrees */
38 /* */
39 /*=======================================================================*/
40 
41 #ifndef __PredictionSuffixTree_H__
42 #define __PredictionSuffixTree_H__
43 
44 #include "EST_simplestats.h"
45 #include "EST_types.h"
46 #include "EST_Features.h"
47 
49 private:
50 
51 protected:
52 
53  int p_level;
54  int state;
56  EST_String path; /* context */
57  void delete_node(void *n) { if (n != 0) delete (EST_PredictionSuffixTree_tree_node *)n;}
58 
59 public:
60 
61 // EST_StringTrie nodes;
62 // EST_TKVL <EST_String, EST_PredictionSuffixTree_tree_node *> nodes;
64  EST_PredictionSuffixTree_tree_node() {p_level=0; state=0;}
66  void clear(void);
67  const EST_String &get_path(void) const {return path;}
68  void set_path(const EST_String &s) {path=s;}
69  void set_level(int l) {p_level=l;}
70  void set_state(int s) {state=s;}
71  int get_state(void) const {return state;}
72  int get_level(void) const {return p_level;}
73  void cumulate(const EST_String &s,double count=1) {pd.cumulate(s,count);}
74  void cumulate(const int i,double count=1) {pd.cumulate(i,count);}
75  const EST_String &most_probable(double *p) const;
76  const EST_DiscreteProbDistribution &prob_dist() const {return pd;}
77  void print_freqs(ostream &os);
78  void print_probs(ostream &os);
79 };
80 
82 
84 
85 private:
86 
87  enum EST_filetype {PredictionSuffixTree_ascii, PredictionSuffixTree_binary};
88 
89 protected:
90 
91  int p_order;
94  EST_DiscreteProbDistribution *pd; // distribution of predictees
95  const EST_String &ppredict(EST_PredictionSuffixTree_tree_node *node,
96  const EST_StrVector &words,
97  double *prob, int *state,
98  const int index=0) const;
99 
100  void p_accumulate(EST_PredictionSuffixTree_tree_node *node,
101  const EST_StrVector &words,
102  double count,
103  const int index=0);
104 
105  const EST_DiscreteProbDistribution &p_prob_dist(
107  const EST_StrVector &words,
108  const int index=0) const;
109 public:
111  EST_PredictionSuffixTree(const int order) {init(order);}
112  EST_PredictionSuffixTree(const EST_String filename);
115  void clear(void);
116  void init(const int order);
117  double samples() const { return pd->samples(); }
118  int states() const { return num_states; }
119  int order(void) const {return p_order;}
120  void accumulate(const EST_StrVector &words,const double count=1,const int index=0);
121 
122  int load(const EST_String filename);
123  int save(const EST_String filename,const EST_PredictionSuffixTree::EST_filetype type=PredictionSuffixTree_ascii);
124 
125  // build EST_PredictionSuffixTree from train data
126  void build(const EST_String filename,
127  const EST_String prev,
128  const EST_String prev_prev,
129  const EST_String last);
130 
131  void build(const EST_StrList &input); // to go
132 
133  void test(const EST_String filename); // test EST_PredictionSuffixTree against test data
134  void print_freqs(ostream &os);
135  void print_probs(ostream &os);
136 
137  const EST_String &predict(const EST_StrVector &words) const;
138  const EST_String &predict(const EST_StrVector &words,double *prob) const;
139  const EST_String &predict(const EST_StrVector &words,double *prob,int *state) const;
141  const
142  {return p_prob_dist(nodes,words);}
143  /* Reverse probability, given X what is prob of EST_PredictionSuffixTree Y */
144  double rev_prob(const EST_StrVector &words) const;
145  double rev_prob(const EST_StrVector &words,
146  const EST_DiscreteProbDistribution &pd) const;
147  /* print frequency or probability models */
148  /* build model from file */
149  /* predict and measure success */
150 
151 };
152 
153 #endif // __PredictionSuffixTree_H__
EST_DiscreteProbDistribution pd
Definition: EST_PST.h:55
EST_PredictionSuffixTree_tree_node * nodes
Definition: EST_PST.h:93
const EST_DiscreteProbDistribution & prob_dist(const EST_StrVector &words) const
Definition: EST_PST.h:140
EST_PredictionSuffixTree(const int order)
Definition: EST_PST.h:111
double samples(void) const
Total number of example found.
bool save(Lattice &lattice, EST_String filename)
int get_level(void) const
Definition: EST_PST.h:72
const EST_String & most_probable(double *p) const
Definition: EST_PST.cc:120
bool load(Lattice &lattice, EST_String filename)
EST_DiscreteProbDistribution * pd
Definition: EST_PST.h:94
int order(void) const
Definition: EST_PST.h:119
int index(EST_TList< T > &l, T &val, bool(*eq)(const EST_UItem *, const EST_UItem *)=NULL)
Definition: EST_TList.h:286
void cumulate(const EST_String &s, double count=1)
Definition: EST_PST.h:73
int get_state(void) const
Definition: EST_PST.h:71
void cumulate(const EST_String &s, double count=1)
Add this observation, may specify number of occurrences.
#define VAL_REGISTER_CLASS_DCLS(NAME, CLASS)
Definition: EST_Val_defs.h:44
double samples() const
Definition: EST_PST.h:117
section options Options< strong > or ngram_per_line Pseudo words
const EST_DiscreteProbDistribution & prob_dist() const
Definition: EST_PST.h:76
void print_probs(ostream &os)
Definition: EST_PST.cc:94
int states() const
Definition: EST_PST.h:118
void cumulate(const int i, double count=1)
Definition: EST_PST.h:74
void set_path(const EST_String &s)
Definition: EST_PST.h:68
void print_freqs(ostream &os)
Definition: EST_PST.cc:67
EST_StrVector vocab
Definition: dp_main.cc:85
const EST_String & get_path(void) const
Definition: EST_PST.h:67