Edinburgh Speech Tools  2.1-release
rgcompile.cc
Go to the documentation of this file.
1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1996 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author : Alan W Black */
34 /* Date : December 1997 */
35 /*-----------------------------------------------------------------------*/
36 /* */
37 /* A Regular grammar compiler, its pretty free about the grammar */
38 /* Actually it will take full context free grammars and convert them */
39 /* up to a specified rewrite depth */
40 /* */
41 /* Based loosely on "Finite State Machines from Features Grammars" by */
42 /* Black, International Workshop of Parsing Technologies, CMU 89 */
43 /* */
44 /*=======================================================================*/
45 #include <iostream>
46 #include "EST_cutils.h"
47 #include "EST_WFST.h"
48 
49 using namespace std;
50 
51 static LISP find_rewrites(LISP rules, LISP terms, LISP nonterms);
52 static LISP rg_find_nt_ts(LISP rules,LISP sets);
53 static LISP prod_join(LISP n, LISP p);
54 static int production_index(LISP state,
56  int proposed);
57 
58 void rgcompile(LISP rg, EST_WFST &all_wfst)
59 {
60  // Build a transducer from given regular grammar.
61  LISP nt_ts,nonterms,terms,rewrites;
62  LISP sets=siod_nth(2,rg);
63  LISP rules=siod_nth(3,rg);
64 
65  nt_ts = rg_find_nt_ts(rules,sets);
66  nonterms = car(nt_ts);
67  terms = cdr(nt_ts);
68 
69  rewrites = find_rewrites(rules,terms,nonterms);
70 
71  if (rewrites == NIL)
72  return; // left recursive or no rules
73 
74  all_wfst.build_from_rg(terms,terms,
75  car(car(rules)), // distinguished symbol
76  rewrites,
77  sets,terms,25);
78 
79 }
80 
81 static LISP find_rewrites(LISP rules, LISP terms, LISP nonterms)
82 {
83  // Find the full rewrites of each nonterminal until a terminal
84  // appears as the first item
85  LISP nt,r;
86  LISP rewrites = NIL;
87  (void)terms;
88 
89  // got lazy and haven't done this recursively yet ...
90  for (nt=nonterms; nt != NIL; nt=cdr(nt))
91  {
92  LISP nn = NIL;
93  for (r=rules; r != NIL; r=cdr(r))
94  if (car(car(r)) == car(nt)) // depend on symbols being eq
95  nn = cons(cdr(cdr(car(r))),nn);
96  rewrites = cons(cons(car(nt),nn),rewrites);
97  }
98 
99  return rewrites;
100 }
101 
102 static LISP rg_find_nt_ts(LISP rules,LISP sets)
103 {
104  // Find the alphabets used in the rules
105  LISP terms=NIL,nonterms=NIL,r,s,set,t;
106 
107  for (r=rules; r != NIL; r=cdr(r))
108  if (!siod_member_str(get_c_string(car(car(r))),nonterms))
109  nonterms = cons(car(car(r)),nonterms);
110 
111  for (r=rules; r != NIL; r=cdr(r))
112  for (s=cdr(cdr(car(r))); s != NIL; s=cdr(s))
113  if ((!siod_member_str(get_c_string(car(s)),terms)) &&
114  (!siod_member_str(get_c_string(car(s)),nonterms)) &&
115  (!siod_assoc_str(get_c_string(car(s)),sets)))
116  terms = cons(car(s),terms);
117  else if ((set=siod_assoc_str(get_c_string(car(s)),sets)))
118  {
119  for (t=cdr(set); t != 0; t=cdr(t))
120  if (!siod_member_str(get_c_string(car(t)),terms))
121  terms = cons(car(t),terms);
122  }
123 
124  return cons(nonterms,terms);
125 }
126 
127 
128 void EST_WFST::build_from_rg(LISP inalpha, LISP outalpha,
129  LISP distinguished, LISP rewrites,
130  LISP sets, LISP terms,
131  int max_depth)
132 {
133  // This is sort of similar to determinising in that the "state"
134  // is represented by a list of numbers, i.e. the remainder of
135  // of production
136  LISP current, start_state, remainder, set, new_prod;
137  int ns, current_state;
138  const char *current_sym;
139  LISP agenda = NIL;
141  (void)max_depth;
142  int c=0;
143 
144  clear();
145  init(inalpha,outalpha);
146  int i_epsilon = in_epsilon();
147  int o_epsilon = out_epsilon();
148 
149  // Create a starting state and add it to this WFST
150  p_start_state = add_state(wfst_nonfinal);
151  start_state = cons(flocons((double)p_start_state),
152  cons(distinguished,NIL));
153 
154  production_index(start_state,index,p_start_state);
155 
156  agenda = cons(start_state,agenda); // initialize agenda
157 
158  while (agenda != NIL)
159  {
160  current = car(agenda);
161  agenda = cdr(agenda);
162  current_state = get_c_int(car(current));
163  current_sym = get_c_string(car(cdr(current)));
164  remainder = cdr(cdr(current));
165  if ((c % 1000)== 0)
166  cout << summary() << " Agenda " << siod_llength(agenda) << endl;
167  c++;
168 
169  if ((set=siod_assoc_str(current_sym,sets)))
170  {
171  ns = production_index(remainder,index,p_num_states);
172  // add transitions for each member of set
173  for (LISP s=cdr(set); s != NIL; s=cdr(s))
174  p_states[current_state]
175  ->add_transition(0.0, // no weights
176  ns,
177  p_in_symbols.name(get_c_string(car(s))),
178  p_out_symbols.name(get_c_string(car(s))));
179  if (remainder == NIL)
180  add_state(wfst_final);
181  else if (ns == p_num_states) // its a new remainder
182  {
183  add_state(wfst_nonfinal);
184  agenda = cons(cons(flocons(ns),remainder),agenda);
185  }
186  }
187  else if (siod_member_str(current_sym,terms))
188  {
189  ns = production_index(remainder,index,p_num_states);
190  // Add transition for this terminal symbol
191  p_states[current_state]
192  ->add_transition(0.0, // no weights
193  ns,
194  p_in_symbols.name(current_sym),
195  p_out_symbols.name(current_sym));
196  if (remainder == NIL)
197  add_state(wfst_final);
198  else if (ns == p_num_states) // its a new remainder
199  {
200  add_state(wfst_nonfinal);
201  agenda = cons(cons(flocons(ns),remainder),agenda);
202  }
203  }
204  else // its a non-terminal so simply rewrite
205  {
206  for (LISP p=cdr(siod_assoc_str(current_sym,rewrites));
207  p != NIL;
208  p=cdr(p))
209  {
210  new_prod = prod_join(car(p),remainder);
211  ns = production_index(new_prod,index,p_num_states);
212  p_states[current_state]
213  ->add_transition(0.0, // no weights
214  ns,i_epsilon,o_epsilon);
215  if (ns == p_num_states) // its a new remainder
216  {
217  if (new_prod == NIL)
218  add_state(wfst_final);
219  else
220  {
221  add_state(wfst_nonfinal);
222  agenda = cons(cons(flocons(ns),new_prod),agenda);
223  }
224  }
225  }
226  }
227  }
228 }
229 
230 static int production_index(LISP state,
232  int proposed)
233 {
234  // Returns proposed if ms is not already in index, otherwise
235  // returns the value that was proposed when it was first new.
236 
237  // I'll have to make this more efficient in future.
238  EST_String istring("");
239  LISP p;
240 
241  for (p=state; p != NIL; p = cdr(p))
242  istring += EST_String(get_c_string(car(p))) + " ";
243 
244  int ns,found;
245 
246  for (p=state; p != NIL; p = cdr(p))
247  istring += EST_String(get_c_string(car(p))) + " ";
248 
249  ns = index.val(istring,found);
250  if (found)
251  return ns;
252  else
253  {
254  index.add_item(istring,proposed);
255  return proposed;
256  }
257 
258 }
259 
260 static LISP prod_join(LISP n, LISP p)
261 {
262  if (n == NIL)
263  return p;
264  else
265  return cons(car(n),prod_join(cdr(n),p));
266 }
a call representing a weighted finite-state transducer
Definition: EST_WFST.h:154
long int get_c_int(LISP x)
Definition: slib.cc:1850
#define NIL
Definition: siod_defs.h:92
int siod_llength(LISP list)
Definition: siod.cc:202
A specialised hash table for when the key is an EST_String.
Definition: EST_THash.h:304
LISP siod_assoc_str(const char *key, LISP alist)
Definition: siod.cc:125
int index(EST_TList< T > &l, T &val, bool(*eq)(const EST_UItem *, const EST_UItem *)=NULL)
Definition: EST_TList.h:286
LISP siod_nth(int nth, LISP list)
Definition: siod.cc:214
const char * get_c_string(LISP x)
Definition: slib.cc:638
LISP cons(LISP x, LISP y)
Definition: slib_list.cc:97
V & val(const K &key, int &found) const
Definition: EST_THash.cc:114
void rgcompile(LISP rg, EST_WFST &all_wfst)
Definition: rgcompile.cc:58
void build_from_rg(LISP inalpha, LISP outalpha, LISP distinguished, LISP rewrites, LISP sets, LISP terms, int max_depth)
Definition: rgcompile.cc:128
int add_item(const K &key, const V &value, int no_search=0)
Add an entry to the table.
Definition: EST_THash.cc:167
LISP flocons(double x)
Definition: slib.cc:673
LISP car(LISP x)
Definition: slib_list.cc:115
EST_String
LISP siod_member_str(const char *key, LISP list)
Definition: siod.cc:167
LISP cdr(LISP x)
Definition: slib_list.cc:124