Edinburgh Speech Tools  2.1-release
ols_main.cc
Go to the documentation of this file.
1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1996,1997 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author : Alan W Black */
34 /* Date : January 1998 */
35 /*-----------------------------------------------------------------------*/
36 /* Ordinary least squares */
37 /* */
38 /*=======================================================================*/
39 #include <cstdlib>
40 #include <iostream>
41 #include <fstream>
42 #include <cstring>
43 #include "EST_Wagon.h"
44 #include "EST_multistats.h"
45 #include "EST_cmd_line.h"
46 
47 using namespace std;
48 
49 static void load_ols_data(EST_FMatrix &X, EST_FMatrix &Y, WDataSet &d);
50 static int ols_main(int argc, char **argv);
51 
52 int main(int argc, char **argv)
53 {
54  return ols_main(argc,argv);
55 }
56 
57 static int ols_main(int argc, char **argv)
58 {
59  // Top level function loads in sample data and finds coefficients
60  EST_Option al;
61  EST_StrList files;
62  EST_String ofile = "-";
63  WDataSet dataset,test_dataset;
64  EST_FMatrix coeffs;
65  EST_FMatrix X,Y,Xtest,Ytest;
66  LISP ignores = NIL;
67 
69  (argc, argv,
70  EST_String("[options]\n")+
71  "Summary: Linear Regression by ordinary least squares (defaults in {})\n"+
72  "-desc <ifile> Field description file\n"+
73  "-data <ifile> Datafile, one vector per line\n"+
74  "-test <ifile> Datafile, for testing\n"+
75  "-robust Robust, may take longer\n"+
76  "-stepwise Order the features by contribution,\n"+
77  " implies robust.\n"+
78  "-swlimit <float> {0.0}\n"+
79  " Percentage necessary improvement for stepwise\n"+
80  "-quiet No summary\n"+
81  "-o <ofile> \n"+
82  "-output <ofile> Output file for coefficients\n"+
83  "-ignore <string> Filename or bracket list of fields to ignore\n",
84  files, al);
85 
86 
87  if (al.present("-output"))
88  ofile = al.val("-output");
89  if (al.present("-o"))
90  ofile = al.val("-o");
91 
92  siod_init();
93 
94  if (al.present("-ignore"))
95  {
96  EST_String ig = al.val("-ignore");
97  if (ig[0] == '(')
98  ignores = read_from_string(ig);
99  else
100  ignores = vload(ig,1);
101  }
102 
103  // Load in the data
104  if (!al.present("-desc"))
105  {
106  cerr << "ols: no description file specified\n";
107  return -1;
108  }
109  else
110  {
111  dataset.load_description(al.val("-desc"),ignores);
112  dataset.ignore_non_numbers();
113  }
114  if (!al.present("-data"))
115  {
116  cerr << "ols: no data file specified\n";
117  return -1;
118  }
119  else
120  wgn_load_dataset(dataset,al.val("-data"));
121  if (al.present("-test"))
122  {
123  test_dataset.load_description(al.val("-desc"),ignores);
124  test_dataset.ignore_non_numbers();
125  wgn_load_dataset(test_dataset,al.val("-test"));
126  load_ols_data(Xtest,Ytest,test_dataset);
127  }
128  else
129  // No test data specified so use training data
130  load_ols_data(Xtest,Ytest,dataset);
131 
132  load_ols_data(X,Y,dataset);
133 
134  if (al.present("-stepwise"))
135  {
136  EST_StrList names;
137  float swlimit = al.fval("-swlimit");
138  EST_IVector included;
139  int i;
140 
141  names.append("Intercept");
142  for (i=1; i < dataset.width(); i++)
143  names.append(dataset.feat_name(i));
144 
145  included.resize(X.num_columns());
146  included[0] = TRUE; // always guarantee interceptor
147  for (i=1; i<included.length(); i++)
148  {
149  if (dataset.ignore(i) == TRUE)
150  included.a_no_check(i) = OLS_IGNORE;
151  else
152  included.a_no_check(i) = FALSE;
153  }
154 
155  if (!stepwise_ols(X,Y,names,swlimit,coeffs,Xtest,Ytest,included))
156  {
157  cerr << "OLS: failed stepwise ols" << endl;
158  return -1;
159  }
160  }
161  else if (al.present("-robust"))
162  {
163  EST_IVector included;
164  int i;
165 
166  included.resize(X.num_columns());
167  included[0] = TRUE; // always guarantee interceptor
168  for (i=1; i<included.length(); i++)
169  {
170  if (dataset.ignore(i) == TRUE)
171  included.a_no_check(i) = OLS_IGNORE;
172  else
173  included.a_no_check(i) = TRUE;
174  }
175 
176  if (!robust_ols(X,Y,included,coeffs))
177  {
178  cerr << "OLS: failed robust ols" << endl;
179  return -1;
180  }
181  }
182  else if (!ols(X,Y,coeffs))
183  {
184  cerr << "OLS: failed no pseudo_inverse" << endl;
185  return -1;
186  }
187 
188  if (coeffs.save(ofile) != write_ok)
189  {
190  cerr << "OLS: failed to save coefficients in \"" << ofile << "\""
191  << endl;
192  return -1;
193  }
194 
195  if (!al.present("-quiet"))
196  {
197  EST_FMatrix pred;
198  float cor,rmse;
199 
200  ols_apply(Xtest,coeffs,pred);
201  ols_test(Ytest,pred,cor,rmse);
202 
203  printf(";; RMSE %f Correlation is %f\n",rmse,cor);
204  }
205 
206  return 0;
207 }
208 
209 static void load_ols_data(EST_FMatrix &X, EST_FMatrix &Y, WDataSet &d)
210 {
211  EST_Litem *p;
212  int n,m;
213 
214  X.resize(d.length(),d.width());
215  Y.resize(d.length(),1);
216 
217  for (n=0,p=d.head(); p != 0; p=p->next(),n++)
218  {
219  Y.a_no_check(n,0) = d(p)->get_flt_val(0);
220  X.a_no_check(n,0) = 1;
221  for (m=1; m < d.width(); m++)
222  {
223  if (d.ignore(m))
224  {
225  X.a_no_check(n,m) = 0;
226  }
227  else
228  X.a_no_check(n,m) = d(p)->get_flt_val(m);
229  }
230  }
231 
232 }
int ols_test(const EST_FMatrix &real, const EST_FMatrix &predicted, float &correlation, float &rmse)
Definition: EST_ols.cc:288
int width(void) const
Definition: EST_Wagon.h:94
int ignore(int i) const
Definition: EST_Wagon.h:90
int robust_ols(const EST_FMatrix &X, const EST_FMatrix &Y, EST_FMatrix &coeffs)
Definition: EST_ols.cc:73
int ols(const EST_FMatrix &X, const EST_FMatrix &Y, EST_FMatrix &coeffs)
Definition: EST_ols.cc:59
ssize_t num_columns() const
return number of columns
Definition: EST_TMatrix.h:179
#define NIL
Definition: siod_defs.h:92
INLINE const T & a_no_check(ssize_t n) const
read-only const access operator: without bounds checking
Definition: EST_TVector.h:254
float fval(const EST_String &rkey, int m=1) const
Definition: EST_Option.cc:104
#define OLS_IGNORE
EST_UItem * next()
Definition: EST_UList.h:55
EST_write_status save(const EST_String &filename, const EST_String &type=EST_FMatrix::default_file_type)
Save in file (ascii or binary)
Definition: EST_FMatrix.cc:341
int main(int argc, char **argv)
Definition: ols_main.cc:52
LISP vload(const char *fname, long cflag)
Definition: slib_file.cc:632
INLINE ssize_t length() const
number of items in vector.
Definition: EST_TVector.h:249
int stepwise_ols(const EST_FMatrix &X, const EST_FMatrix &Y, const EST_StrList &feat_names, float limit, EST_FMatrix &coeffs, const EST_FMatrix &Xtest, const EST_FMatrix &Ytest, EST_IVector &included)
Definition: EST_ols.cc:199
The file was written successfully.
LISP read_from_string(const char *)
Definition: slib_str.cc:65
int ols_apply(const EST_FMatrix &samples, const EST_FMatrix &coeffs, EST_FMatrix &res)
Definition: EST_ols.cc:185
const EST_String & feat_name(const int &i) const
Definition: EST_Wagon.h:92
#define FALSE
Definition: EST_bool.h:119
const V & val(const K &rkey, bool m=0) const
return value according to key (const)
Definition: EST_TKVL.cc:145
void append(const T &item)
add item onto end of list
Definition: EST_TList.h:196
int length() const
Definition: EST_UList.cc:57
void ignore_non_numbers()
Definition: wagon_aux.cc:162
#define X
int present(const K &rkey) const
Returns true if key is present.
Definition: EST_TKVL.cc:222
void wgn_load_dataset(WDataSet &ds, EST_String fname)
Definition: wagon.cc:118
EST_UItem * head() const
Definition: EST_UList.h:97
int siod_init(int heap_size=DEFAULT_HEAP_SIZE)
Definition: siod.cc:58
INLINE const T & a_no_check(ssize_t row, ssize_t col) const
const access with no bounds check, care recommend
Definition: EST_TMatrix.h:182
void resize(int rows, int cols, int set=1)
resize matrix
EST_String
#define TRUE
Definition: EST_bool.h:118
int parse_command_line(int argc, char *argv[], const EST_String &usage, EST_StrList &files, EST_Option &al, int make_stdio=1)
Definition: cmd_line.cc:101
void load_description(const EST_String &descfname, LISP ignores)
Definition: wagon_aux.cc:181
void resize(int n, int set=1)
resize vector