58 static int wagon_main(
int argc,
char **argv);
61 int main(
int argc,
char **argv)
64 wagon_main(argc,argv);
77 wgn_VertexFeats.
a(static_cast<ssize_t>(0),i) = 0.0;
92 wgn_VertexFeats.
a(static_cast<ssize_t>(0),i) = 1.0;
94 }
else if ((ws ==
",") || (ws ==
""))
97 wgn_VertexFeats.
a(static_cast<ssize_t>(0),s) = 1.0;
104 for (i=s; i<=e && i<wgn_VertexFeats.
num_channels(); i++)
105 wgn_VertexFeats.
a(static_cast<ssize_t>(0),i) = 1.0;
108 printf(
"wagon: track_feats invalid: %s at position %lld\n",
109 (
const char *)wagon_track_features,
118 static int wagon_main(
int argc,
char **argv)
124 ostream *wgn_coutput = 0;
125 float stepwise_limit = 0;
126 int feats_start=0, feats_end=0;
132 "Summary: CART building program\n"+
133 "-desc <ifile> Field description file\n"+
134 "-data <ifile> Datafile, one vector per line\n"+
135 "-stop <int> {50} Minimum number of examples for leaf nodes\n"+
136 "-test <ifile> Datafile to test tree on\n"+
137 "-frs <float> {10} Float range split, number of partitions to\n"+
138 " split a float feature range into\n"+
139 "-dlist Build a decision list (rather than tree)\n"+
140 "-dtree Build a decision tree (rather than list) default\n"+
141 "-output <ofile> \n"+
142 "-o <ofile> File to save output tree in\n"+
143 "-distmatrix <ifile>\n"+
144 " A distance matrix for clustering\n"+
146 " track for vertex indices\n"+
147 "-track_start <int>\n"+
148 " start channel vertex indices\n"+
149 "-track_end <int>\n"+
150 " end (inclusive) channel for vertex indices\n"+
151 "-track_feats <string>\n"+
152 " Track features to use, comma separated list\n"+
153 " with feature numbers and/or ranges, 0 start\n"+
154 "-unittrack <ifile>\n"+
155 " track for unit start and length in vertex track\n"+
156 "-quiet No questions printed during building\n"+
157 "-verbose Lost of information printing during build\n"+
158 "-predictee <string>\n"+
159 " name of field to predict (default is first field)\n"+
160 "-ignore <string>\n"+
161 " Filename or bracket list of fields to ignore\n"+
162 "-count_field <string>\n"+
163 " Name of field containing count weight for samples\n"+
164 "-stepwise Incrementally find best features\n"+
165 "-swlimit <float> {0.0}\n"+
166 " Percentage necessary improvement for stepwise,\n"+
167 " may be negative.\n"+
168 "-swopt <string> Parameter to optimize for stepwise, for \n"+
169 " classification options are correct or entropy\n"+
170 " for regression options are rmse or correlation\n"+
171 " correct and correlation are the defaults\n"+
172 "-balance <float> For derived stop size, if dataset at node, divided\n"+
173 " by balance is greater than stop it is used as stop\n"+
174 " if balance is 0 (default) always use stop as is.\n"+
175 "-vertex_output <string> Output <mean> or <best> of cluster\n"+
176 "-held_out <int> Percent to hold out for pruning\n"+
177 "-heap <int> {210000}\n"+
178 " Set size of Lisp heap, should not normally need\n"+
179 " to be changed from its default, only with *very*\n"+
180 " large description files (> 1M)\n"+
181 "-noprune No (same class) pruning required\n",
190 cerr << argv[0] <<
": missing description and/or datafile" << endl;
191 cerr <<
"use -h for description of arguments" << endl;
205 if (al.
present(
"-count_field"))
208 stepwise_limit = al.
fval(
"-swlimit");
213 if (al.
present(
"-vertex_output"))
218 wgn_oname = al.
val(
"-o");
220 wgn_oname = al.
val(
"-output");
221 wgn_coutput =
new ofstream(wgn_oname);
224 cerr <<
"Wagon: can't open file \"" << wgn_oname <<
225 "\" for output " << endl;
235 cerr <<
"Wagon: failed to load Distance Matrix from \"" <<
236 al.
val(
"-distmatrix") <<
"\"\n" << endl;
255 ignores =
vload(ig,1);
260 if (al.
present(
"-distmatrix") &&
263 cerr <<
"wagon: distance matrix is smaller than number of training elements\n";
271 wgn_VertexFeats.
a(static_cast<ssize_t>(0),i) = 1.0;
274 if (al.
present(
"-track_start"))
276 feats_start = al.
ival(
"-track_start");
277 if ((feats_start < 0) ||
280 printf(
"wagon: track_start invalid: %d out of %d channels\n",
285 for (i=0; i<feats_start; i++)
286 wgn_VertexFeats.
a(static_cast<ssize_t>(0),i) = 0.0;
292 feats_end = al.
ival(
"-track_end");
293 if ((feats_end < feats_start) ||
296 printf(
"wagon: track_end invalid: %d between start %d out of %d channels\n",
303 wgn_VertexFeats.
a(static_cast<ssize_t>(0),i) = 0.0;
305 if (al.
present(
"-track_feats"))
308 set_Vertex_Feats(wgn_VertexFeats,wagon_track_features);
336 cerr <<
"Wagon: unknown operation, not tree or list" << endl;
342 *wgn_coutput << *
tree;
347 if (wgn_coutput != &cout)
void set_WhiteSpaceChars(const EST_String &ws)
set which characters are to be treated as whitespace
EST_TokenStream & get(EST_Token &t)
get next token in stream
EST_String wgn_vertex_output
void wgn_load_datadescription(EST_String fname, LISP ignores)
int ival(const EST_String &rkey, int m=1) const
int num_channels() const
return number of channels in track
void set_SingleCharSymbols(const EST_String &sc)
set which characters are to be treated as single character symbols
const EST_String & whitespace()
ssize_t num_rows() const
return number of rows
float fval(const EST_String &rkey, int m=1) const
void resize(ssize_t num_frames, int num_channels, bool preserve=1)
void set_PrePunctuationSymbols(const EST_String &ps)
set which characters are to be treated as (post) punctuation
int open_string(const EST_String &newbuffer)
open a EST_TokenStream for string rather than a file
void set_PunctuationSymbols(const EST_String &ps)
set which characters are to be treated as (post) punctuation
LISP vload(const char *fname, long cflag)
EST_read_status load(const EST_String name, float ishift=0.0, float startt=0.0)
int main(int argc, char **argv)
float & a(ssize_t i, int c=0)
WNode * wagon_stepwise(float limit)
WNode * wgn_build_dlist(float &score, ostream *output)
LISP read_from_string(const char *)
EST_String wgn_count_field_name
EST_read_status load(const EST_String &filename)
Load from file (ascii or binary as defined in file)
EST_FMatrix wgn_DistMatrix
const V & val(const K &rkey, bool m=0) const
return value according to key (const)
EST_String wgn_predictee_name
EST_Track wgn_VertexTrack
float summary_results(WNode &tree, ostream *output)
EST_FilePos filepos(void) const
current file position in EST_TokenStream
const EST_String & string() const
int present(const K &rkey) const
Returns true if key is present.
void wgn_load_dataset(WDataSet &ds, EST_String fname)
int siod_init(int heap_size=DEFAULT_HEAP_SIZE)
float wgn_float_range_split
EST_Track wgn_VertexFeats
WDataSet wgn_test_dataset
int parse_command_line(int argc, char *argv[], const EST_String &usage, EST_StrList &files, EST_Option &al, int make_stdio=1)
WNode * wgn_build_tree(float &score)