Edinburgh Speech Tools  2.1-release
EST_Wagon.h
Go to the documentation of this file.
1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1996,1997 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author : Alan W Black */
34 /* Date : May 1996 */
35 /*-----------------------------------------------------------------------*/
36 /* */
37 /* Public declarations for Wagon (CART builder) */
38 /* */
39 /*=======================================================================*/
40 #ifndef __WAGON_H__
41 #define __WAGON_H__
42 
43 #include "EST_String.h"
44 #include "EST_Val.h"
45 #include "EST_TVector.h"
46 #include "EST_TList.h"
47 #include "EST_simplestats.h" /* For EST_SuffStats class */
48 #include "EST_Track.h"
49 #include "siod.h"
50 #define wagon_error(WMESS) (std::cerr << WMESS << std::endl,exit(-1))
51 
52 // I get floating point exceptions of Alphas when I do any comparisons
53 // with HUGE_VAL or FLT_MAX so I'll make my own
54 #define WGN_HUGE_VAL 1.0e20
55 
56 class WVector : public EST_FVector
57 {
58  public:
59  WVector(int n) : EST_FVector(n) {}
60  int get_int_val(int n) const { return (int)a_no_check(n); }
61  float get_flt_val(int n) const { return a_no_check(n); }
62  void set_int_val(int n,int i) { a_check(n) = (int)i; }
63  void set_flt_val(int n,float f) { a_check(n) = f; }
64 };
65 
68 
69 /* Different types of feature */
70 enum wn_dtype {/* for predictees and predictors */
72  /* for predictees only */
75  /* for ignored features */
77 
78 class WDataSet : public WVectorList {
79  private:
80  int dlength;
81  EST_IVector p_type;
82  EST_IVector p_ignore;
83  EST_StrVector p_name;
84  public:
85  WDataSet(): WVectorList() { dlength = 0;};
86  void load_description(const EST_String& descfname,LISP ignores);
87  void ignore_non_numbers();
88 
89  int ftype(const int &i) const {return p_type(i);}
90  int ignore(int i) const {return p_ignore(i); }
91  void set_ignore(int i,int value) { p_ignore[i] = value; }
92  const EST_String &feat_name(const int &i) const {return p_name(i);}
93  int samples(void) const {return length();}
94  int width(void) const {return dlength;}
95 };
98 
99 class WQuestion {
100  private:
101  int feature_pos;
102  wn_oper op;
103  int yes;
104  int no;
105  EST_Val operand1;
106  EST_IList operandl;
107  float score;
108  public:
110  this->yes = 0;
111  this->no = 0;
112  this->score = WGN_HUGE_VAL;
113  this->op = wnop_equal;
114  this->feature_pos = 0;}
116  { feature_pos=s.feature_pos;
117  op=s.op; yes=s.yes; no=s.no; operand1=s.operand1;
118  operandl = s.operandl; score=s.score;}
121  { feature_pos=fp; op=o; operand1=a; }
122  void set_fp(const int &fp) {feature_pos=fp;}
123  void set_oper(const wn_oper &o) {op=o;}
124  void set_operand1(const EST_Val &a) {operand1 = a;}
125  void set_yes(const int &y) {yes=y;}
126  void set_no(const int &n) {no=n;}
127  int get_yes(void) const {return yes;}
128  int get_no(void) const {return no;}
129  int get_fp(void) const {return feature_pos;}
130  wn_oper get_op(void) const {return op;}
131  const EST_Val get_operand1(void) const {return operand1;}
132  const EST_IList &get_operandl(void) const {return operandl;}
133  float get_score(void) const {return score;}
134  void set_score(const float &f) {score=f;}
135  int ask(const WVector &w) const;
136  friend ostream& operator<<(ostream& s, const WQuestion &q);
137 };
138 
142 
143 // Impurity measure for cumulating impurities from set of data
144 class WImpurity {
145  private:
146  wnim_type t;
149 
150  float cluster_impurity();
151  float cluster_member_mean(int i);
152  float vector_impurity();
153  float trajectory_impurity();
154  float ols_impurity();
155  public:
156  EST_IList members; // Maybe there should be a cluster class
157  EST_FList member_counts; // AUP: Implement counts for vectors
159  const WVectorVector *data; // Needed for ols
160  float score;
161  int l,width;
162 
163  WImpurity() { t=wnim_unset; a.reset(); trajectory=0; l=0; width=0; data=0; score=WGN_HUGE_VAL;}
164  ~WImpurity();
165  WImpurity(const WVectorVector &ds);
166  void copy(const WImpurity &s)
167  {
168  int i,j;
169  t=s.t; a=s.a; p=s.p; members=s.members; member_counts = s.member_counts; l=s.l; width=s.width;
170  score = s.score;
171  data = s.data;
172  if (s.trajectory)
173  {
174  trajectory = new EST_SuffStats *[l];
175  for (i=0; i<l; i++)
176  {
177  trajectory[i] = new EST_SuffStats[width];
178  for (j=0; j<width; j++)
179  trajectory[i][j] = s.trajectory[i][j];
180  }
181  }
182  }
183  WImpurity &operator = (const WImpurity &a) { copy(a); return *this; }
184 
185  float measure(void);
186  double samples(void);
187  wnim_type type(void) const { return t;}
188  void cumulate(const float pv,double count=1.0);
189  EST_Val value(void);
191  float cluster_distance(int i); // distance i from centre in sds
192  int in_cluster(int i); // distance i from centre < most remote member
193  float cluster_ranking(int i); // position in closeness to centre
194  friend ostream& operator<<(ostream &s, WImpurity &imp);
195 };
196 
197 class WDlist {
198  private:
199  float p_score;
200  WQuestion p_question;
201  EST_String p_token;
202  int p_freq;
203  int p_samples;
204  WDlist *next;
205  public:
206  WDlist() {
207  next=0;
208  p_score = WGN_HUGE_VAL;
209  p_freq = 0;
210  p_samples = 0;
211  }
212  ~WDlist() { if (next != 0) delete next; }
213  void set_score(float s) { p_score = s; }
214  void set_question(const WQuestion &q) { p_question = q; }
215  void set_best(const EST_String &t,int freq, int samples)
216  { p_token = t; p_freq = freq; p_samples = samples;}
217  float score() const {return p_score;}
218  const EST_String &token(void) const {return p_token;}
219  const WQuestion &question() const {return p_question;}
220  EST_Val predict(const WVector &w);
221  friend WDlist *add_to_dlist(WDlist *l,WDlist *a);
222  friend ostream &operator<<(ostream &s, WDlist &d);
223 };
224 
225 class WNode {
226  private:
227  WVectorVector data;
228  WQuestion question;
229  WImpurity impurity;
230  WNode *left;
231  WNode *right;
232  void print_out(ostream &s, int margin);
233  int leaf(void) const { return ((left == 0) || (right == 0)); }
234  int pure(void);
235  public:
236  WNode() { left = right = 0; }
237  ~WNode() { if (left != 0) {delete left; left=0;}
238  if (right != 0) {delete right; right=0;} }
239  WVectorVector &get_data(void) { return data; }
240  void set_subnodes(WNode *l,WNode *r) { left=l; right=r; }
241  void set_impurity(const WImpurity &imp) {impurity=imp;}
242  void set_question(const WQuestion &q) {question=q;}
243  void prune(void);
244  void held_out_prune(void);
245  WImpurity &get_impurity(void) {return impurity;}
246  WQuestion &get_question(void) {return question;}
247  EST_Val predict(const WVector &w);
248  WNode *predict_node(const WVector &d);
249  int samples(void) const { return data.n(); }
250  friend ostream& operator<<(ostream &s, WNode &n);
251 };
252 
253 extern Discretes wgn_discretes;
254 extern WDataSet wgn_dataset;
258 extern EST_Track wgn_UnitTrack;
260 
261 void wgn_load_datadescription(EST_String fname,LISP ignores);
262 void wgn_load_dataset(WDataSet &ds,EST_String fname);
263 WNode *wgn_build_tree(float &score);
264 WNode *wgn_build_dlist(float &score,ostream *output);
265 WNode *wagon_stepwise(float limit);
269 float summary_results(WNode &tree,ostream *output);
270 
271 extern int wgn_min_cluster_size;
272 extern int wgn_held_out;
273 extern int wgn_prune;
274 extern int wgn_quiet;
275 extern int wgn_verbose;
276 extern int wgn_predictee;
277 extern int wgn_count_field;
280 extern float wgn_float_range_split;
281 extern float wgn_balance;
284 
285 #define wgn_ques_feature(X) (get_c_string(car(X)))
286 #define wgn_ques_oper_str(X) (get_c_string(car(cdr(X))))
287 #define wgn_ques_operand(X) (car(cdr(cdr(X))))
288 
289 int wagon_ask_question(LISP question, LISP value);
290 
291 int stepwise_ols(const EST_FMatrix &X,
292  const EST_FMatrix &Y,
293  const EST_StrList &feat_names,
294  float limit,
295  EST_FMatrix &coeffs,
296  const EST_FMatrix &Xtest,
297  const EST_FMatrix &Ytest,
298  EST_IVector &included,
299  float &best_score);
300 int robust_ols(const EST_FMatrix &X,
301  const EST_FMatrix &Y,
302  EST_IVector &included,
303  EST_FMatrix &coeffs);
304 int ols_apply(const EST_FMatrix &samples,
305  const EST_FMatrix &coeffs,
306  EST_FMatrix &res);
307 int ols_test(const EST_FMatrix &real,
308  const EST_FMatrix &predicted,
309  float &correlation,
310  float &rmse);
311 
312 #endif /* __WAGON_H__ */
int width(void) const
Definition: EST_Wagon.h:94
int wgn_count_field
Definition: wagon.cc:71
const WVectorVector * data
Definition: EST_Wagon.h:159
EST_String wgn_vertex_output
Definition: wagon.cc:78
wn_oper
Definition: EST_Wagon.h:96
float wgn_score_question(WQuestion &q, WVectorVector &ds)
Definition: wagon.cc:1091
int ignore(int i) const
Definition: EST_Wagon.h:90
EST_TList< WVector * > WVectorList
Definition: EST_Wagon.h:66
WDataSet wgn_dataset
Definition: wagon.cc:59
void wgn_load_datadescription(EST_String fname, LISP ignores)
Definition: wagon.cc:111
int get_no(void) const
Definition: EST_Wagon.h:128
void set_fp(const int &fp)
Definition: EST_Wagon.h:122
int samples(void) const
Definition: EST_Wagon.h:249
WQuestion(int fp, wn_oper o, EST_Val a)
Definition: EST_Wagon.h:120
void set_ignore(int i, int value)
Definition: EST_Wagon.h:91
A vector class for floating point numbers. EST_FVector x should be used instead of float *x wherever ...
Definition: EST_FMatrix.h:119
WImpurity & get_impurity(void)
Definition: EST_Wagon.h:245
int wgn_predictee
Definition: wagon.cc:73
STATIC void left(STATUS Change)
Definition: editline.c:523
void wgn_find_split(WQuestion &q, WVectorVector &ds, WVectorVector &y, WVectorVector &n)
Definition: wagon.cc:775
Discretes wgn_discretes
Definition: wagon.cc:57
void set_question(const WQuestion &q)
Definition: EST_Wagon.h:242
STATIC void right(STATUS Change)
Definition: editline.c:538
int get_yes(void) const
Definition: EST_Wagon.h:127
int wgn_min_cluster_size
Definition: wagon.cc:66
int ols_test(const EST_FMatrix &real, const EST_FMatrix &predicted, float &correlation, float &rmse)
Definition: EST_ols.cc:288
wn_oper get_op(void) const
Definition: EST_Wagon.h:130
INLINE const float & a_no_check(ssize_t n) const
read-only const access operator: without bounds checking
Definition: EST_TVector.h:254
EST_Track wgn_UnitTrack
Definition: wagon.cc:64
friend std::ostream & operator<<(std::ostream &st, const EST_TVector< float > &m)
print out vector.
Definition: EST_TVector.h:310
int wgn_held_out
Definition: wagon.cc:67
EST_SuffStats ** trajectory
Definition: EST_Wagon.h:158
EST_TVector< WVector * > WVectorVector
Definition: EST_Wagon.h:67
const float & a_check(ssize_t n) const
read-only const access operator: with bounds checking
wn_dtype
Definition: EST_Wagon.h:70
EST_String wgn_opt_param
Definition: wagon.cc:77
const float & a(ssize_t n) const
Definition: EST_TVector.h:269
EST_IList members
Definition: EST_Wagon.h:156
WQuestion & get_question(void)
Definition: EST_Wagon.h:246
EST_FList member_counts
Definition: EST_Wagon.h:157
int robust_ols(const EST_FMatrix &X, const EST_FMatrix &Y, EST_IVector &included, EST_FMatrix &coeffs)
Definition: EST_ols.cc:87
int get_fp(void) const
Definition: EST_Wagon.h:129
const EST_String & token(void) const
Definition: EST_Wagon.h:218
void set_subnodes(WNode *l, WNode *r)
Definition: EST_Wagon.h:240
void copy(const WImpurity &s)
Definition: EST_Wagon.h:166
WVector(int n)
Definition: EST_Wagon.h:59
int width
Definition: EST_Wagon.h:161
void set_flt_val(int n, float f)
Definition: EST_Wagon.h:63
void set_operand1(const EST_Val &a)
Definition: EST_Wagon.h:124
WNode()
Definition: EST_Wagon.h:236
float wgn_balance
Definition: wagon.cc:76
INLINE ssize_t length() const
number of items in vector.
Definition: EST_TVector.h:249
void set_no(const int &n)
Definition: EST_Wagon.h:126
WNode * wagon_stepwise(float limit)
Definition: wagon.cc:1098
void set_yes(const int &y)
Definition: EST_Wagon.h:125
WDlist()
Definition: EST_Wagon.h:206
WNode * wgn_build_dlist(float &score, ostream *output)
Definition: dlist.cc:60
const EST_String & feat_name(const int &i) const
Definition: EST_Wagon.h:92
const WQuestion & question() const
Definition: EST_Wagon.h:219
void set_impurity(const WImpurity &imp)
Definition: EST_Wagon.h:241
float score() const
Definition: EST_Wagon.h:217
EST_TVector< T > & copy(EST_TVector< T > a, const EST_TList< T > &in)
EST_String wgn_count_field_name
Definition: wagon.cc:72
EST_TSimpleVector & operator=(const EST_TSimpleVector< float > &s)
assignment operator
f
Definition: EST_item_aux.cc:48
EST_FMatrix wgn_DistMatrix
Definition: wagon.cc:61
int wgn_quiet
Definition: wagon.cc:69
WDlist * add_to_dlist(WDlist *l, WDlist *a)
Definition: dlist.cc:194
void set_score(const float &f)
Definition: EST_Wagon.h:134
getString int
Definition: EST_item_aux.cc:50
~WQuestion()
Definition: EST_Wagon.h:119
EST_String wgn_predictee_name
Definition: wagon.cc:74
float correlation(EST_Track &a, EST_Track &b, ssize_t channel)
void reset(void)
reset internal values
EST_Track wgn_VertexTrack
Definition: wagon.cc:62
const EST_IList & get_operandl(void) const
Definition: EST_Wagon.h:132
wnim_type
Definition: EST_Wagon.h:139
const EST_Val get_operand1(void) const
Definition: EST_Wagon.h:131
WVectorVector & get_data(void)
Definition: EST_Wagon.h:239
void set_question(const WQuestion &q)
Definition: EST_Wagon.h:214
float summary_results(WNode &tree, ostream *output)
Definition: wagon.cc:215
float get_score(void) const
Definition: EST_Wagon.h:133
#define X
int wagon_ask_question(LISP question, LISP value)
Definition: wagonint.cc:49
~WDlist()
Definition: EST_Wagon.h:212
int stepwise_ols(const EST_FMatrix &X, const EST_FMatrix &Y, const EST_StrList &feat_names, float limit, EST_FMatrix &coeffs, const EST_FMatrix &Xtest, const EST_FMatrix &Ytest, EST_IVector &included, float &best_score)
EST_DiscreteProbDistribution & pd()
Definition: EST_Wagon.h:190
void wgn_load_dataset(WDataSet &ds, EST_String fname)
Definition: wagon.cc:118
int get_int_val(int n) const
Definition: EST_Wagon.h:60
int samples(void) const
Definition: EST_Wagon.h:93
float get_flt_val(int n) const
Definition: EST_Wagon.h:61
WDataSet()
Definition: EST_Wagon.h:85
void set_int_val(int n, int i)
Definition: EST_Wagon.h:62
float wgn_float_range_split
Definition: wagon.cc:75
float score
Definition: EST_Wagon.h:160
void set_oper(const wn_oper &o)
Definition: EST_Wagon.h:123
int tree
Definition: rxp.c:21
EST_Track wgn_VertexFeats
Definition: wagon.cc:63
#define WGN_HUGE_VAL
Definition: EST_Wagon.h:54
LISP fp
Definition: kkcompile.cc:63
int ftype(const int &i) const
Definition: EST_Wagon.h:89
~WNode()
Definition: EST_Wagon.h:237
INLINE ssize_t n() const
number of items in vector.
Definition: EST_TVector.h:251
void set_best(const EST_String &t, int freq, int samples)
Definition: EST_Wagon.h:215
WQuestion(const WQuestion &s)
Definition: EST_Wagon.h:115
WDataSet wgn_test_dataset
Definition: wagon.cc:60
void set_score(float s)
Definition: EST_Wagon.h:213
int wgn_prune
Definition: wagon.cc:68
int wgn_verbose
Definition: wagon.cc:70
int ols_apply(const EST_FMatrix &samples, const EST_FMatrix &coeffs, EST_FMatrix &res)
Definition: EST_ols.cc:185
WNode * wgn_build_tree(float &score)
Definition: wagon.cc:239
wnim_type type(void) const
Definition: EST_Wagon.h:187