Edinburgh Speech Tools  2.1-release
ltscompile.cc
Go to the documentation of this file.
1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1996 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author : Alan W Black */
34 /* Date : December 1997 */
35 /*-----------------------------------------------------------------------*/
36 /* */
37 /* A LTS rule compiler, where rules are contextual rewrite rules. Rules */
38 /* are of the for LC [ x ] RC => y where LC and RC are regexs on the */
39 /* tape only and x and y are simple strings on symbols. That is the */
40 /* standard form of LTS rules used in Festival. */
41 /* */
42 /*=======================================================================*/
43 #include <iostream>
44 #include "EST_cutils.h"
45 #include "EST_WFST.h"
46 
47 using namespace std;
48 
49 static LISP lts_find_feasible_pairs(LISP rules);
50 static LISP make_fp(LISP in, LISP out);
51 static LISP find_outs(LISP rule);
52 static LISP find_ins(LISP rule);
53 static LISP add_alpha(LISP n, LISP existing);
54 static LISP lts_find_alphabets(LISP rules);
55 static void ltsrule_compile(LISP inalpha, LISP outalpha,
56  LISP fp, LISP sets, LISP rule,
57  EST_WFST &a, EST_WFST &not_a);
58 static LISP analyse_rule(LISP rule);
59 static LISP expand_sets(LISP l, LISP fp, LISP sets);
60 static LISP expand_set(LISP p, LISP fp, LISP sets);
61 static LISP find_notMAP(LISP MAP,LISP fp);
62 
63 void ltscompile(LISP lts_rules, EST_WFST &all_wfst)
64 {
65  // Build a transducer from given LTS rules. Because the interpretation
66  // of these rules is normally ordered and the WFST is not, the
67  // complement of each cumulative WFST must be generated before
68  // adding the next rule
69  LISP r;
70  LISP fp; // feasible pairs, those pairs with rules (rather than IxO)
71  LISP inalpha,outalpha,alphas;
72  LISP sets=siod_nth(2,lts_rules);
73  LISP rules=siod_nth(3,lts_rules);
74  EST_WFST nots;
75 
76  alphas = lts_find_alphabets(lts_rules);
77  inalpha = car(alphas);
78  outalpha = cdr(alphas);
79 
80  fp = lts_find_feasible_pairs(rules);
81 
82  // set up an empty WFST, accepts nothing
83  all_wfst.build_from_regex(inalpha,outalpha,NIL);
84  // matches things not matched by rules, everything to begin with
85  nots.build_from_regex(inalpha,outalpha,
86  cons(rintern("*"),
87  cons(cons(rintern("or"),fp),NIL)));
88  nots.save("-");
89 
90  for (r=rules; r != NIL; r=cdr(r))
91  {
92  EST_WFST a, not_a,b,c,d;
93 
94  // all = all u (all' n r)
95  ltsrule_compile(inalpha,outalpha,fp,sets,car(r),a,not_a);
96  pprint(car(r));
97  a.save("-");
98  c.intersection(a,nots);
99  c.save("-");
100 
101  // Add for next rule
102  b.uunion(nots,not_a);
103  not_a.save("-");
104  b.save("-");
105  nots = b;
106 
107  d.uunion(all_wfst,c);
108  all_wfst = d;
109  all_wfst.save("-");
110  }
111 }
112 
113 static LISP lts_find_alphabets(LISP rules)
114 {
115  // Find the alphabets used in the rules
116  LISP r;
117  LISP in=NIL, out=NIL;
118 
119  for (r=siod_nth(3,rules); r != NIL; r=cdr(r))
120  {
121  in = add_alpha(find_ins(car(r)),in);
122  out = add_alpha(find_outs(car(r)),out);
123  }
124 
125  return cons(in,out);
126 }
127 
128 static LISP add_alpha(LISP n, LISP existing)
129 {
130  // Add values in n if not already in existing
131  LISP t;
132  LISP e=existing;
133 
134  for (t=n; t != NIL; t=cdr(t))
135  if (!siod_member_str(get_c_string(car(t)),e))
136  e = cons(car(t),e);
137 
138  return e;
139 }
140 
141 static LISP find_ins(LISP rule)
142 {
143  // find all symbols in [] in rule
144  LISP c;
145  int state=FALSE;
146  LISP ins = NIL;
147 
148  for (c=rule; c != NIL; c=cdr(c))
149  {
150  if (streq("[",get_c_string(car(c))))
151  state=TRUE;
152  else if (streq("]",get_c_string(car(c))))
153  break;
154  else if (state)
155  ins = cons(car(c),ins);
156  }
157  return reverse(ins);
158 }
159 
160 static LISP find_outs(LISP rule)
161 {
162  // find all symbols after = rule
163  LISP c;
164  int state=FALSE;
165  LISP outs = NIL;
166 
167  for (c=rule; c != NIL; c=cdr(c))
168  {
169  if (streq("=",get_c_string(car(c))))
170  state=TRUE;
171  else if (state)
172  outs = cons(car(c),outs);
173  }
174  return reverse(outs);
175 }
176 
177 static LISP lts_find_feasible_pairs(LISP rules)
178 {
179  // Find the set of pairs that have rules associated with them
180  // This effectively defines the transducer alphabet.
181  // We take the surface part in [] and the part after the = and
182  // linearly match them to form a set of pairs, padded with epsilon
183  // if necessary.
184  LISP fp = NIL;
185  LISP r;
186 
187  for (r=rules; r != NIL; r=cdr(r))
188  {
189  LISP in = find_ins(car(r));
190  LISP out = find_outs(car(r));
191 
192  LISP pairs = make_fp(in,out);
193 
194  for (LISP p=pairs; p != NIL; p=cdr(p))
195  {
196  if (!siod_member_str(get_c_string(car(p)),fp))
197  fp = cons(car(p),fp);
198  }
199  }
200  return fp;
201 }
202 
203 static LISP make_fp(LISP in, LISP out)
204 {
205  // Returns a list of pairs by matching each member of in to out
206  // padding the shorted one with _epsilon_ if necessary
207  LISP i,o;
208  LISP fp=NIL;
209  EST_String is,os;
210  int m;
211 
212  if (siod_llength(in) > siod_llength(out))
213  m = siod_llength(in);
214  else
215  m = siod_llength(out);
216 
217  for (i=in,o=out ; m > 0; --m,i=cdr(i),o=cdr(o))
218  {
219  if (i == NIL)
220  is = "__epsilon__";
221  else
222  is = get_c_string(car(i));
223  if (o == NIL)
224  os = "__epsilon__";
225  else
226  os = get_c_string(car(o));
227  fp = cons(strintern(is+"/"+os),fp);
228  }
229  return reverse(fp);
230 }
231 
232 static void ltsrule_compile(LISP inalpha, LISP outalpha,
233  LISP fp, LISP sets, LISP rule,
234  EST_WFST &a, EST_WFST &not_a)
235 {
236  // Return two regexs, one matching with rewrites and another
237  // that matches things this rule doesn't match.
238  LISP LC,MAP,RC,/*notMAP,*/r;
239 
240  r = analyse_rule(rule);
241  LC = siod_nth(0,r);
242  MAP = siod_nth(1,r);
243  RC = siod_nth(2,r);
244 
245  LC = expand_sets(LC,fp,sets);
246  RC = expand_sets(RC,fp,sets);
247  /*notMAP = */find_notMAP(MAP,fp);
248 
249 
250  LISP kk = cons(LC,cons(MAP,cons(RC,NIL)));
251  cout << "kk rule" << endl;
252  pprint(kk);
253  a.kkrule_compile(inalpha,outalpha,fp,kk,NIL);
254 
255  // (or (* <fp>) (not <rule>)) ;; everything except the rule
256  LISP regex_r = cons(rintern("and"),append(LC,append(MAP,RC)));
257 // LISP nn = cons(rintern("or"),
258 // cons(cons(rintern("*"),cons(cons(rintern("or"),fp),NIL)),
259 // cons(cons(rintern("not"),cons(regex_r,NIL)),
260 // NIL)));
261  LISP nn = cons(rintern("not"),cons(regex_r,NIL));
262  not_a.build_from_regex(inalpha,outalpha,nn);
263 
264 }
265 
266 static LISP analyse_rule(LISP rule)
267 {
268  // return the left context, map and right context;
269  LISP LC=NIL, RC=NIL, in=NIL, out=NIL;
270  LISP l;
271  int state=0;
272 
273  for (l=rule; l != NIL; l=cdr(l))
274  {
275  if ((state==0) && (!streq("[",get_c_string(car(l)))))
276  LC = cons(car(l),LC);
277  else if ((state==0) && (streq("[",get_c_string(car(l)))))
278  state = 1;
279  else if ((state==1) && (!streq("]",get_c_string(car(l)))))
280  in = cons(car(l),in);
281  else if ((state==1) && (streq("]",get_c_string(car(l)))))
282  state = 2;
283  else if ((state==2) && (!streq("=",get_c_string(car(l)))))
284  RC = cons(car(l),RC);
285  else if ((state==2) && (streq("=",get_c_string(car(l)))))
286  state = 3;
287  else if (state == 3)
288  {
289  out = l;
290  break;
291  }
292  }
293 
294  return cons(reverse(LC),
295  cons(make_fp(reverse(in),out),
296  cons(reverse(RC),NIL)));
297 
298 }
299 
300 static LISP expand_sets(LISP l, LISP fp, LISP sets)
301 {
302  // Expand sets in l and fix regex characters
303  LISP r,es=NIL;
304 
305  for (r=l; r != NIL; r=cdr(r))
306  {
307  LISP s = expand_set(car(r),fp,sets);
308  if (cdr(r) && (streq("*",get_c_string(car(cdr(r))))))
309  {
310  es = cons(cons(rintern("*"),s),es);
311  r=cdr(r);
312  }
313  else if (cdr(r) && (streq("+",get_c_string(car(cdr(r))))))
314  {
315  es = cons(cons(rintern("+"),s),es);
316  r=cdr(r);
317  }
318  else
319  es = cons(cons(rintern("and"),s),es);
320  }
321  return reverse(es);
322 }
323 
324 static LISP expand_set(LISP p, LISP fp, LISP sets)
325 {
326  // expand p with respect to sets and feasible pairs
327  LISP set = siod_assoc_str(get_c_string(p),sets);
328  LISP s,f;
329  LISP r=NIL;
330 
331  if (set == NIL)
332  set = cons(p,NIL);
333 
334  for (s=set; s != NIL; s=cdr(s))
335  {
336  for (f=fp; f != NIL; f=cdr(f))
337  {
338  EST_String ss = get_c_string(car(s));
339  EST_String sf = get_c_string(car(f));
340 
341  if (sf.contains(ss+"/"))
342  r = cons(car(f),r);
343  }
344  }
345 
346  return reverse(r);
347 }
348 
349 static LISP find_notMAP(LISP MAP,LISP fp)
350 {
351  // Returns REGEX that matches everything except MAP, this doesn't
352  // try all possible epsilons though
353  LISP r,notrp=NIL,m,np;
354  EST_String s,l,p,sr,lr,rr;
355 
356  for (m=MAP; m != NIL; m=cdr(m))
357  {
358  p = get_c_string(car(m));
359  if (p.contains("/"))
360  {
361  s = p.before("/");
362  l = p.after("/");
363  }
364  else
365  {
366  s = p;
367  l = p;
368  }
369 
370  for (np=NIL,r=fp; r != NIL; r = cdr(r))
371  {
372  rr = get_c_string(car(r));
373  if (rr.contains("/"))
374  {
375  sr = rr.before("/");
376  lr = rr.after("/");
377  }
378  else
379  {
380  sr = rr;
381  lr = rr;
382  }
383  if ((s == sr) && (l != lr))
384  np = cons(car(r),np);
385  }
386  notrp = cons(cons(rintern("or"),np),notrp);
387  }
388 
389  return reverse(notrp);
390 }
391 
int contains(const char *s, ssize_t pos=-1) const
Does it contain this substring?
Definition: EST_String.h:365
a call representing a weighted finite-state transducer
Definition: EST_WFST.h:154
#define NIL
Definition: siod_defs.h:92
int siod_llength(LISP list)
Definition: siod.cc:202
LISP strintern(const char *data)
Definition: slib_str.cc:22
LISP append(LISP l1, LISP l2)
Definition: slib_list.cc:177
void ltscompile(LISP lts_rules, EST_WFST &all_wfst)
Definition: ltscompile.cc:63
void intersection(EST_TList< EST_WFST > &wl)
Definition: wfst_ops.cc:357
#define streq(X, Y)
Definition: EST_cutils.h:57
LISP siod_assoc_str(const char *key, LISP alist)
Definition: siod.cc:125
LISP siod_nth(int nth, LISP list)
Definition: siod.cc:214
void pprint(LISP exp)
Definition: slib_file.cc:95
const char * get_c_string(LISP x)
Definition: slib.cc:638
void build_from_regex(LISP inalpha, LISP outalpha, LISP regex)
Definition: wfst_regex.cc:192
LISP cons(LISP x, LISP y)
Definition: slib_list.cc:97
void kkrule_compile(LISP inalpha, LISP outalpha, LISP fp, LISP rule, LISP sets)
Definition: kkcompile.cc:233
#define FALSE
Definition: EST_bool.h:119
EST_write_status save(const EST_String &filename, const EST_String type="ascii")
?
Definition: EST_WFST.cc:353
void uunion(EST_TList< EST_WFST > &wl)
f
Definition: EST_item_aux.cc:48
LISP rintern(const char *name)
Definition: slib.cc:734
LISP car(LISP x)
Definition: slib_list.cc:115
EST_String after(int pos, int len=1) const
Part after pos+len.
Definition: EST_String.h:308
EST_String before(int pos, int len=0) const
Part before position.
Definition: EST_String.h:276
LISP fp
Definition: kkcompile.cc:63
#define TRUE
Definition: EST_bool.h:118
void reverse(EST_Wave &sig)
LISP siod_member_str(const char *key, LISP list)
Definition: siod.cc:167
LISP cdr(LISP x)
Definition: slib_list.cc:124