Edinburgh Speech Tools  2.1-release
wfst_build_main.cc
Go to the documentation of this file.
1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1996,1997 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author : Alan W Black */
34 /* Date : November 1997 */
35 /*-----------------------------------------------------------------------*/
36 /* Build a WFST from some base: */
37 /* 1 a set of context dependent rewrite rules using the */
38 /* the algorithms from "An Efficient Compiler for Weighted Rewrite */
39 /* Rules", by Mehryar Mohri and Richard Sproat ACL 1996 */
40 /* and information from the techniques in Rithie el al. 1992 */
41 /* 2 A regular grammar (but can be written as a CFG as long as it */
42 /* contains no centre embedding */
43 /* 3 A regular expression */
44 /* 4 lts rules (but that doesn't work yet) */
45 /* */
46 /* or apply some operator on existing wfst(s): compose, concatenate, */
47 /* difference, union, */
48 /* */
49 /* Also allow determinizing and minimization as required */
50 /* */
51 /*=======================================================================*/
52 #include <cstdlib>
53 #include <cstdio>
54 #include <iostream>
55 #include <fstream>
56 #include <cstring>
57 #include "EST.h"
58 #include "EST_WFST.h"
59 
60 using namespace std;
61 
62 static int wfst_build_main(int argc, char **argv);
63 
64 
65 int main(int argc, char **argv)
66 {
67 
68  wfst_build_main(argc,argv);
69 
70  exit(0);
71  return 0;
72 }
73 
74 static int wfst_build_main(int argc, char **argv)
75 {
76  // Top level function generates a WFST from rules
77  EST_Option al;
78  EST_StrList files;
80 
82  (argc, argv,
83  EST_String("[option] [rulefile0] [rulefile1] ...\n")+
84  "Summary: Build a weighted finite state transducer from rules/wfsts\n"+
85  "-type <string> {kk} Input rule type: kk, lts, rg, tl, compose, regex\n"+
86  " union, intersect, concat, asis\n"+
87  "-determinize Determinize WFST before saving it\n"+
88  "-detmin Determinize and minimize WFST before saving it\n"+
89  "-o <ofile> Output file for saved WFST (default stdout)\n"+
90  "-otype <string> {ascii}\n"+
91  " Output type, ascii or binary\n"+
92  "-heap <int> {210000}\n"+
93  " Set size of Lisp heap, needed for large rulesets\n"+
94  "-q Quiet mode, no summary generated\n",
95  files, al);
96 
97  if (al.present("-o"))
98  outfile = al.val("-o");
99  else
100  outfile = "-";
101 
102  siod_init(al.ival("-heap"));
103 
104  LISP ruleset;
105  LISP inalpha, outalpha;
106  EST_WFST *wfst = new EST_WFST;
107  gc_protect(&ruleset);
108 
109  if (al.val("-type") == "kk")
110  {
111  ruleset = car(vload(files(files.head()),1));
112  kkcompile(ruleset,*wfst);
113  }
114  else if (al.val("-type") == "lts")
115  {
116  ruleset = car(vload(files(files.head()),1));
117  ltscompile(ruleset,*wfst);
118  }
119  else if (al.val("-type") == "rg")
120  {
121  ruleset = car(vload(files(files.head()),1));
122  rgcompile(ruleset,*wfst);
123  }
124  else if (al.val("-type") == "tl")
125  {
126  ruleset = car(vload(files(files.head()),1));
127  tlcompile(ruleset,*wfst);
128  }
129  else if (al.val("-type") == "asis")
130  {
131  if (wfst->load(files.nth(0)) != format_ok) exit(-1);
132  }
133  else if (al.val("-type") == "compose")
134  {
135  EST_WFST a,b;
136 
137  if (files.length() != 2)
138  EST_error("compose requires two WFSTs to combine");
139 
140  if (a.load(files.nth(0)) != format_ok) exit(-1);
141  if (b.load(files.nth(1)) != format_ok) exit(-1);
142 
143  wfst->compose(a,b);
144  }
145  else if (al.val("-type") == "union")
146  {
147  EST_WFST a,b;
148 
149  if (files.length() != 2)
150  EST_error("union requires two WFSTs to combine");
151 
152  if (a.load(files.nth(0)) != format_ok) exit(-1);
153  if (b.load(files.nth(1)) != format_ok) exit(-1);
154 
155  wfst->uunion(a,b);
156  }
157  else if (al.val("-type") == "intersect")
158  {
159  EST_WFST a,b;
160 
161  if (files.length() != 2)
162  EST_error("intersect requires two WFSTs to combine");
163  if (a.load(files.nth(0)) != format_ok) exit(-1);
164  if (b.load(files.nth(1)) != format_ok) exit(-1);
165 
166  wfst->intersection(a,b);
167  }
168  else if (al.val("-type") == "concat")
169  {
170  EST_WFST a,b;
171 
172  if (files.length() != 2)
173  EST_error("concat requires two WFSTs to combine");
174  if (a.load(files.nth(0)) != format_ok) exit(-1);
175  if (b.load(files.nth(1)) != format_ok) exit(-1);
176 
177  wfst->concat(a,b);
178  }
179  else if (al.val("-type") == "difference")
180  {
181  EST_WFST a,b;
182 
183  if (files.length() != 2)
184  EST_error("difference requires two WFSTs to combine");
185  if (a.load(files.nth(0)) != format_ok) exit(-1);
186  if (b.load(files.nth(1)) != format_ok) exit(-1);
187 
188  wfst->difference(a,b);
189  }
190  else if (al.val("-type") == "regex")
191  {
192  ruleset = car(vload(files(files.head()),1));
193  inalpha = siod_nth(0,ruleset);
194  outalpha = siod_nth(1,ruleset);
195  wfst->build_from_regex(inalpha,outalpha,car(cdr(cdr(ruleset))));
196  }
197  else
198  {
199  cerr << "wfst_build: unknown rule type \"" << al.val("-type")
200  << "\"" << endl;
201  exit(-1);
202  }
203 
204  if (al.present("-determinize"))
205  {
206  EST_WFST *dwfst = new EST_WFST;
207  dwfst->determinize(*wfst);
208  if (!al.present("-q"))
209  {
210  cout << "wfst_build summary: " << endl;
211  cout << " non-deterministic wfst: " <<
212  wfst->summary() << endl;
213  cout << " deterministic wfst: " <<
214  dwfst->summary() << endl;
215  }
216  delete wfst;
217  wfst = dwfst;
218  }
219  else if (al.present("-detmin"))
220  {
221  if (!al.present("-q"))
222  {
223  cout << "wfst_build summary: " << endl;
224  cout << " non-deterministic wfst: " <<
225  wfst->summary() << endl;
226  }
227  EST_WFST *dwfst = new EST_WFST;
228  dwfst->determinize(*wfst);
229  delete wfst;
230  if (!al.present("-q"))
231  cout << " deterministic wfst: " <<
232  dwfst->summary() << endl;
233  EST_WFST *mwfst = new EST_WFST;
234  mwfst->minimize(*dwfst);
235  if (!al.present("-q"))
236  cout << " minimized wfst: " <<
237  mwfst->summary() << endl;
238  delete dwfst;
239  wfst = mwfst;
240  }
241  else
242  {
243  if (!al.present("-q"))
244  cout << "wfst_build: " << wfst->summary() << endl;
245  }
246 
247  wfst->save(outfile,al.val("-otype"));
248  delete wfst;
249  gc_unprotect(&ruleset);
250 
251  return 0;
252 }
253 
void minimize(const EST_WFST &a)
Build minimized form of a.
Definition: wfst_ops.cc:485
a call representing a weighted finite-state transducer
Definition: EST_WFST.h:154
int ival(const EST_String &rkey, int m=1) const
Definition: EST_Option.cc:82
void gc_unprotect(LISP *location)
Definition: slib.cc:759
void ltscompile(LISP lts_rules, EST_WFST &all_wfst)
Definition: ltscompile.cc:63
int main(int argc, char **argv)
void intersection(EST_TList< EST_WFST > &wl)
Definition: wfst_ops.cc:357
T & nth(int n)
return the Nth value
Definition: EST_TList.h:145
void determinize(const EST_WFST &a)
Build determinized form of a.
Definition: wfst_ops.cc:166
LISP siod_nth(int nth, LISP list)
Definition: siod.cc:214
void tlcompile(LISP tl, EST_WFST &all_wfst)
Definition: tlcompile.cc:57
LISP vload(const char *fname, long cflag)
Definition: slib_file.cc:632
void build_from_regex(LISP inalpha, LISP outalpha, LISP regex)
Definition: wfst_regex.cc:192
void kkcompile(LISP ruleset, EST_WFST &all_wfst)
Definition: kkcompile.cc:72
void compose(const EST_WFST &a, const EST_WFST &b)
Definition: wfst_ops.cc:813
void rgcompile(LISP rg, EST_WFST &all_wfst)
Definition: rgcompile.cc:58
EST_write_status save(const EST_String &filename, const EST_String type="ascii")
?
Definition: EST_WFST.cc:353
EST_String outfile
void concat(const EST_WFST &a, const EST_WFST &b)
Definition: wfst_ops.cc:777
void uunion(EST_TList< EST_WFST > &wl)
#define EST_error
Definition: EST_error.h:104
const V & val(const K &rkey, bool m=0) const
return value according to key (const)
Definition: EST_TKVL.cc:145
int length() const
Definition: EST_UList.cc:57
#define format_ok
int present(const K &rkey) const
Returns true if key is present.
Definition: EST_TKVL.cc:222
EST_String summary() const
Definition: EST_WFST.cc:647
EST_UItem * head() const
Definition: EST_UList.h:97
void gc_protect(LISP *location)
Definition: slib.cc:791
int siod_init(int heap_size=DEFAULT_HEAP_SIZE)
Definition: siod.cc:58
LISP car(LISP x)
Definition: slib_list.cc:115
EST_String
void difference(const EST_WFST &a, const EST_WFST &b)
Definition: wfst_ops.cc:899
int parse_command_line(int argc, char *argv[], const EST_String &usage, EST_StrList &files, EST_Option &al, int make_stdio=1)
Definition: cmd_line.cc:101
EST_read_status load(const EST_String &filename)
?
Definition: EST_WFST.cc:521
LISP cdr(LISP x)
Definition: slib_list.cc:124