Edinburgh Speech Tools  2.1-release
wfst_run_main.cc
Go to the documentation of this file.
1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1996,1997 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author : Alan W Black */
34 /* Date : December 1997 */
35 /*-----------------------------------------------------------------------*/
36 /* Run a WFST on some data, either as a recognizer or as a transducer */
37 /* */
38 /*=======================================================================*/
39 #include <cstdlib>
40 #include <cstdio>
41 #include <iostream>
42 #include <fstream>
43 #include <cstring>
44 #include "EST.h"
45 #include "EST_simplestats.h"
46 #include "EST_WFST.h"
47 
48 using namespace std;
49 
50 static int wfst_run_main(int argc, char **argv);
51 
52 
53 int main(int argc, char **argv)
54 {
55 
56  wfst_run_main(argc,argv);
57 
58  exit(0);
59  return 0;
60 }
61 
62 static int wfst_run_main(int argc, char **argv)
63 {
64  // recognize/transduce
65  EST_Option al;
66  EST_StrList files;
67  EST_Litem *f;
68  EST_String wfstfile;
69  FILE *ofd;
70  int r;
71  EST_SuffStats R;
72  float sumlogp=0,isumlogp;
73  float count=0,icount;
74 
76  (argc, argv,
77  EST_String("[WFSTFILE] [input file0] ... [-o output file]\n")+
78  "Summary: Recognize/transduce using a WFST on data\n"+
79  "-wfst <ifile> The WFST to use\n"+
80  "-transduce Transduce input to output (default)\n"+
81  "-recog Recognize input consists of pairs\n"+
82  "-cumulate_into <ofile>\n"+
83  " Cumulate transitions to give new weights\n"+
84  " save new WFST into ofile\n"+
85  "-itype <string> char or token\n"+
86  "-quiet No extraneous messages\n"+
87  "-perplexity Calculate perplexity on given data set\n"+
88  "-heap <int> {210000}\n"+
89  " Set size of Lisp heap, needed for large wfsts\n"+
90  "-o <ofile> Output file for transduced forms\n",
91  files, al);
92 
93  if (al.present("-o"))
94  {
95  if ((ofd=fopen(al.val("-o"),"w")) == NULL)
96  EST_error("can't open output file for writing \"%s\"",
97  (const char *)al.val("-o"));
98  }
99  else
100  ofd = stdout;
101 
102  if (al.present("-wfst"))
103  wfstfile = al.val("-wfst");
104  else
105  EST_error("no WFST specified");
106 
107  siod_init(al.ival("-heap"));
108 
109  EST_WFST wfst;
110  EST_TokenStream ts;
111 
112  if (wfst.load(wfstfile) != format_ok)
113  EST_error("failed to read WFST from \"%s\"",
114  (const char *)wfstfile);
115 
116  if (al.present("-cumulate_into"))
117  wfst.start_cumulate();
118 
119  for (f=files.head(); f != 0; f=f->next())
120  {
121  if (files(f) == "-")
122  ts.open(stdin,FALSE);
123  else
124  if (ts.open(files(f)) != 0)
125  EST_error("failed to read WFST data file from \"%s\"",
126  (const char *)files(f));
127 
128  // Not the best way to input things but will do the the present
129  while(!ts.eof())
130  {
131  EST_StrList ostrs,istrs;
132  do
133  istrs.append(ts.get());
134  while((!ts.eof()) && (!ts.eoln()));
135 
136  if (al.present("-recog"))
137  {
138  if (al.present("-perplexity"))
139  {
140  r = recognize_for_perplexity(wfst,istrs,
141  al.present("-quiet"),
142  icount,
143  isumlogp);
144  if (r)
145  {
146  count += icount;
147  sumlogp += isumlogp;
148  }
149  }
150  else
151  r = recognize(wfst,istrs,al.present("-quiet"));
152  }
153  else
154  {
155  r = transduce(wfst,istrs,ostrs);
156  if (r)
157  {
158  cout << ostrs;
159  cout << endl;
160  }
161  }
162  R += r;
163 
164  if (!al.present("-quiet"))
165  {
166  if (r)
167  cout << "OK." << endl;
168  else
169  cout << "failed." << endl;
170  }
171  }
172  ts.close();
173  }
174 
175  if (al.present("-cumulate_into"))
176  {
177  wfst.stop_cumulate();
178  if (wfst.save(al.val("-cumulate_into")) != write_ok)
179  EST_error("failed to write cumulated WFST to \"%s\"",
180  (const char *)al.val("-cumulate_into"));
181  }
182 
183  printf("total %d OK %f%% failed %f%%\n",
184  (int)R.samples(),R.mean()*100,(1-R.mean())*100);
185  if (al.present("-perplexity"))
186  {
187  printf("perplexity is %f\n", pow(float(2.0),float(-1.0 * (sumlogp/count))));
188  }
189 
190  if (ofd != stdout)
191  fclose(ofd);
192 
193  if (R.mean() == 1) // true is *all* files were recognized
194  return 0;
195  else
196  return -1;
197 }
198 
int transduce(const EST_WFST &wfst, const EST_StrList &in, EST_StrList &out)
EST_TokenStream & get(EST_Token &t)
get next token in stream
Definition: EST_Token.cc:499
a call representing a weighted finite-state transducer
Definition: EST_WFST.h:154
int ival(const EST_String &rkey, int m=1) const
Definition: EST_Option.cc:82
double mean(void) const
mean of currently cummulated values
void close(void)
Close stream.
Definition: EST_Token.cc:419
EST_UItem * next()
Definition: EST_UList.h:55
int open(const EST_String &filename)
open a EST_TokenStream for a file.
Definition: EST_Token.cc:213
int recognize(const EST_WFST &wfst, const EST_StrList &in, int quiet)
int eof()
end of file
Definition: EST_Token.h:362
int main(int argc, char **argv)
The file was written successfully.
#define FALSE
Definition: EST_bool.h:119
NULL
Definition: EST_WFST.cc:55
#define EST_error
Definition: EST_error.h:104
f
Definition: EST_item_aux.cc:48
const V & val(const K &rkey, bool m=0) const
return value according to key (const)
Definition: EST_TKVL.cc:145
void append(const T &item)
add item onto end of list
Definition: EST_TList.h:196
double samples(void)
number of samples in set
#define format_ok
int present(const K &rkey) const
Returns true if key is present.
Definition: EST_TKVL.cc:222
EST_UItem * head() const
Definition: EST_UList.h:97
int siod_init(int heap_size=DEFAULT_HEAP_SIZE)
Definition: siod.cc:58
EST_String
int parse_command_line(int argc, char *argv[], const EST_String &usage, EST_StrList &files, EST_Option &al, int make_stdio=1)
Definition: cmd_line.cc:101
int recognize_for_perplexity(const EST_WFST &wfst, const EST_StrList &in, int quiet, float &count, float &sumlogp)
int eoln()
end of line
Definition: EST_Token.cc:832