Edinburgh Speech Tools  2.1-release
wfst_ops.cc
Go to the documentation of this file.
1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1997 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author : Alan W Black */
34 /* Date : November 1997 */
35 /*-----------------------------------------------------------------------*/
36 /* */
37 /* Basic WFST operations: minimization, determinization, intersection, */
38 /* union, composition */
39 /* */
40 /*=======================================================================*/
41 #include <iostream>
42 #include <cstdlib>
43 #include "EST_WFST.h"
44 #include "wfst_aux.h"
45 #include "EST_String.h"
46 #include "EST_TList.h"
47 #include "EST_TKVL.h"
48 #include "EST_THash.h"
49 
50 using namespace std;
51 
52 Declare_TList_T(EST_WFST_MultiState *,EST_WFST_MultiStateP)
53 
54  // Declare_KVL(int, EST_IList)
55 
58 
59  static EST_IList int_EST_IList_kv_def_EST_IList_s;
60  static int int_EST_IList_kv_def_int_s;
61 
62  template <> EST_IList *EST_TKVL< int, EST_IList >::default_val=&int_EST_IList_kv_def_EST_IList_s;
63  template <> int *EST_TKVL< int, EST_IList >::default_key=&int_EST_IList_kv_def_int_s;
64 
65  Declare_TList_N(KVI_int_EST_IList_t, 0)
66 
67 
68 #if defined(INSTANTIATE_TEMPLATES)
69 #include "../base_class/EST_TList.cc"
70 
71  Instantiate_TList_T_MIN(EST_WFST_MultiState *,EST_WFST_MultiStateP)
72 
73 #include "../base_class/EST_TKVL.cc"
74 
75  // Instantiate_KVL(int, EST_IList)
76 
77  template class EST_TKVL<int, EST_IList>;
78  template class EST_TKVI<int, EST_IList>;
79 // ostream &operator<<(ostream &s, EST_TKVI< int , EST_IList > const &i){ return s << i.k << "\t" << i.v << "\n"; }
80 // ostream& operator << (ostream& s, EST_TKVL< int , EST_IList > const &l) {EST_Litem *p; for (p = l.list.head(); p ; p = p->next()) s << l.list(p).k << "\t" << l.list(p).v << endl; return s;}
81  Instantiate_TIterator_T(KVL_int_EST_IList_t, KVL_int_EST_IList_t::IPointer_k, int, KVL_int_EST_IList_kitt)
82  Instantiate_TStructIterator_T(KVL_int_EST_IList_t, KVL_int_EST_IList_t::IPointer, KVI_int_EST_IList_t, KVL_int_EST_IList_itt)
83  Instantiate_TIterator_T(KVL_int_EST_IList_t, KVL_int_EST_IList_t::IPointer, KVI_int_EST_IList_t, KVL_int_EST_IList_itt)
84 
85  // Instantiate_TList(KVI_int_EST_IList_t)
86 
87  template class EST_TList< TLIST_KVI_int_EST_IList_t_VAL >;
88  template class EST_TItem< TLIST_KVI_int_EST_IList_t_VAL >;
89  template const char *error_name(EST_TList< KVI_int_EST_IList_t > val);
90 
91  Instantiate_TIterator_T( EST_TList<KVI_int_EST_IList_t>, EST_TList<KVI_int_EST_IList_t>::IPointer, KVI_int_EST_IList_t, TList_KVI_int_EST_IList_t_itt)
92 
93 
94 #endif
95 
97 
98 static enum wfst_state_type intersect_state_type(wfst_list &wl,
100 static int check_distinguished(const EST_WFST &nmwfst,
101  int p, int q,
102  wfst_marks &marks,
103  wfst_assumes &assumptions);
104 
106 {
107  // If of set type only add it if its not already there
108  EST_Litem *p;
109 
110  if (p_type == wfst_ms_set)
111  for (p=head(); p != 0; p=p->next())
112  {
113  if ((*this)(p) == i)
114  return;
115  else if (i < (*this)(p)) // keep the list ordered
116  {
117  insert_before(p,i);
118  return;
119  }
120  }
121 
122  append(i);
123 }
124 
126  EST_WFST_MultiState *ms,int proposed)
127 {
128  // Returns proposed if ms is not already in index, otherwise
129  // returns the value that was proposed when it was first new.
130 
131  // I'll have to make this more efficient in future.
132  EST_String istring("");
133  EST_Litem *p;
134  int ns,found;
135 
136  for (p=ms->head(); p != 0; p = p->next())
137  istring += itoString((*ms)(p)) + " ";
138 
139  ns = index.val(istring,found);
140  if (found)
141  return ns;
142  else
143  {
144  index.add_item(istring,proposed);
145  return proposed;
146  }
147 }
148 
149 static int pair_check(EST_THash<int,int> &pairs_done, int i, int o, int odim)
150 {
151  int p;
152  int found;
153 
154  p = (i*odim)+o; // unique number representing i/o pair
155 
156  pairs_done.val(p,found);
157  if (!found)
158  { // first time seeing this pair
159  pairs_done.add_item(p,1);
160  return 0;
161  }
162  return 1;
163 
164 }
165 
166 void EST_WFST::determinize(const EST_WFST &ndwfst)
167 {
168  // Determinise a non-deterministic WFST
169  EST_WFST_MultiState *start_state,*nms,*current;
170  int ns;
171  Agenda multistate_agenda;
173  int i,o, new_name;
174  int c=0;
175  EST_Litem *sp, *tp;
176 
177  clear();
178  p_in_symbols.copy(ndwfst.p_in_symbols);
179  p_out_symbols.copy(ndwfst.p_out_symbols);
180 
181  // Create a starting state and add it to this WFST
182  start_state = new EST_WFST_MultiState(wfst_ms_set);
183  start_state->add(ndwfst.start_state());
184  ndwfst.add_epsilon_reachable(start_state);
185 
186  p_start_state = add_state(ndwfst.ms_type(start_state));
187  start_state->set_name(p_start_state);
188 
189  multistate_agenda.append(start_state); // initialize agenda
190 
191  while (multistate_agenda.length() > 0)
192  {
193  EST_THash<int,int> pairs_done(100);
194  current = multistate_agenda.first();
195  multistate_agenda.remove(multistate_agenda.head());
196  if ((c % 100) == 99)
197  cout << "Determinizing " << summary() << " Agenda "
198  << multistate_agenda.length() << endl;
199  c++;
200 
201  for (sp=current->head(); sp != 0; sp=sp->next())
202  {
203  const EST_WFST_State *s = ndwfst.state((*current)(sp));
204  for (tp=s->transitions.head(); tp != 0; tp = tp->next())
205  {
206  i = s->transitions(tp)->in_symbol();
207  o = s->transitions(tp)->out_symbol();
208  // Need to check if i/o has already been proposed
209  if (pair_check(pairs_done,i,o,p_out_symbols.length()) == 1)
210  continue; // already prosed those
211 // for (i=0; i < p_in_symbols.length(); i++)
212 // { // start at 2 to skip any and epsilon characters -- hmm bad
213 // for (o=0; o < p_out_symbols.length(); o++)
214 // {
215  if ((i==o) && (i==0))
216  continue; // don't deal here with epsilon transitions
217  nms = apply_multistate(ndwfst,current,i,o);
218  if ((nms->length() == 0) ||
219  (ndwfst.ms_type(nms) == wfst_error))
220  {
221  delete nms;
222  continue; // no state to go to
223  }
224  new_name = multistate_index(index,nms,p_num_states);
225  if (new_name == p_num_states) // genuinely new
226  { // create a real state and add it to the agenda
227  ns = add_state(ndwfst.ms_type(nms));
228  nms->set_name(ns);
229  multistate_agenda.append(nms);
230  }
231  else
232  {
233  nms->set_name(new_name);
234  }
235 
236  // Add new transition to current state
237  p_states[current->name()]
238  ->add_transition(nms->weight(),
239  nms->name(),
240  i,o);
241  delete nms;
242  }
243  }
244  delete current;
245 
246  // Probably want some progress summary
247  }
248 
249 }
250 
253  int in, int out) const
254 {
255  // Apply in and out to ms and find all new states it becomes
256  EST_Litem *p;
258 
259  new_ms->clear();
260 
261  for (p=ms->head(); p != 0; p=p->next())
262  // Add all new possible states from ms(p) given in/out
263  wfst.transition_all((*ms)(p),in,out,new_ms);
264 
265  // Add epsilon reachable states from any states in multistates
266  wfst.add_epsilon_reachable(new_ms);
267 
268  return new_ms;
269 
270 }
271 
273 {
274  // Returns wfst_error if ms contains an error state, wfst_final
275  // if there is at least one final and wfst_non_final
276  EST_Litem *p;
278 
279  for (p=ms->head(); p != 0; p = p->next())
280  if (p_states((*ms)(p))->type() == wfst_error)
281  return wfst_error;
282  else if (p_states((*ms)(p))->type() == wfst_licence)
283  // wfst_licence states are generated in KK compilation
284  r = wfst_licence;
285  else if ((p_states((*ms)(p))->type() == wfst_final) &&
286  (r != wfst_licence))
287  r = wfst_final;
288 
289  if (r == wfst_licence)
290  return wfst_nonfinal;
291  else
292  return r;
293 }
294 
296  int in,
297  int out,
298  EST_WFST_MultiState *ms) const
299 {
300  // Find all possible new states from state given in/out
301  const EST_WFST_State *s = p_states(state);
302  EST_Litem *i;
303 
304  for (i=s->transitions.head(); i != 0; i=i->next())
305  {
306  if ((in == s->transitions(i)->in_symbol()) &&
307  (out == s->transitions(i)->out_symbol()))
308  ms->add(s->transitions(i)->state());
309  }
310 }
311 
312 static int is_a_member(const EST_IList &ii, int i)
313 {
314  for (EST_Litem *p=ii.head(); p != 0; p=p->next())
315  if (ii(p) == i)
316  return TRUE;
317  return FALSE;
318 }
319 
321 {
322  // As ms->add() adds in order we need to copy to a new list and append
323  // to it any new epsilon accessible states
324  EST_Litem *p;
325  EST_IList ii;
326  int ie = p_in_symbols.name(get_c_string(epsilon_label()));
327  int oe = p_out_symbols.name(get_c_string(epsilon_label()));
328 
329  for (p=ms->head(); p != 0; p=p->next())
330  ii.append((*ms)(p));
331 
332  for (p=ii.head(); p != 0; p=p->next())
333  {
334  const EST_WFST_State *s = p_states(ii(p));
335  EST_Litem *i;
336 
337  for (i=s->transitions.head(); i != 0; i=i->next())
338  {
339  if ((ie == s->transitions(i)->in_symbol()) &&
340  (oe == s->transitions(i)->out_symbol()))
341  {
342  // Add to end of ii if not already there
343  int nstate = s->transitions(i)->state();
344  if (!is_a_member(ii,nstate))
345  {
346  ii.append(nstate);
347  ms->add(nstate); // gets added in order
348  }
349  }
350  }
351  }
352 }
353 
354 /************************************************************************/
355 /* Intersection of a list of transducers, i.e. what is accepted by all */
356 /************************************************************************/
358 {
359  // This is very similar to determinisation, similar complexity too
361  EST_WFST_MultiState *nms,*current;
362  int ns;
363  Agenda multistate_agenda;
365  int i,o, new_name, n;
366  EST_Litem *p,*q;
367  int c=0;
368 
369  // Initialize this WFST from the given ones
370  clear();
371  p_in_symbols.copy(wl.first().p_in_symbols);
372  p_out_symbols.copy(wl.first().p_out_symbols);
373 
374  // Determinize each input WFST and make a new start state consisting of
375  // the start states of each of the input WFSTs
376  for (p=wl.tail(); p != 0; p=p->prev())
377  {
378  if (!wl(p).deterministic())
379  {
380  cout << "...intersection deterministing" << endl;
381  EST_WFST tt = wl(p);
382  wl(p).determinize(tt);
383  }
384  start_state->add(wl(p).start_state());
385  }
386 
387  p_start_state = add_state(intersect_state_type(wl,start_state));
388  // Label multistate start start with single-state name
389  start_state->set_name(p_start_state);
390 
391  multistate_agenda.append(start_state); // initialize agenda
392 
393  while (multistate_agenda.length() > 0)
394  {
395  current = multistate_agenda.first();
396  multistate_agenda.remove(multistate_agenda.head());
397  if ((c % 100) == 99)
398  cout << "Intersection " << summary() << " Agenda "
399  << multistate_agenda.length() << endl;
400  c++;
401 
402  // For all possible in/out pairs
403  for (i=0; i < p_in_symbols.length(); i++)
404  { // start at 2 to skip any and epsilon characters -- hmm bad
405  for (o=0; o < p_out_symbols.length(); o++)
406  {
407  if ((i==o) && (i==0)) // shouldn't be epsilon/epsilon here
408  continue;
410  // Increment multistate to new multistate for each individual
411  // state using each WFST
412  for (n=0,p=wl.head(),q=current->head();
413  (p != 0) && (q != 0);
414  p=p->next(),q=q->next(),n++)
415  nms->add(wl(p).transition((*current)(q),i,o));
416  if (intersect_state_type(wl,nms) == wfst_error)
417  {
418  delete nms;
419  continue; // no state to go to
420  }
421  new_name = multistate_index(index,nms,p_num_states);
422  if (new_name == p_num_states) // genuinely new and unseen
423  { // create a real state and add it to the agenda
424  ns = add_state(intersect_state_type(wl,nms));
425  nms->set_name(ns);
426  multistate_agenda.append(nms);
427  }
428  else // already seen this state, and is already named
429  {
430  nms->set_name(new_name);
431  }
432 
433  // Add new transition to current state
434  p_states[current->name()]
435  ->add_transition(nms->weight(),nms->name(),i,o);
436  delete nms;
437  }
438  }
439  delete current;
440  // Probably want some progress summary
441  }
442 
443 }
444 
445 static enum wfst_state_type intersect_state_type(wfst_list &wl,
447 {
448  // Find the state type of the combined states
449  // If any are error, return error, if one is nonfinal return nonfinal
450  // otherwise its final
451  EST_Litem *p,*q;
452  enum wfst_state_type r = wfst_final;
453 
454  for (p=wl.head(),q=ms->head();
455  (p != 0) && (q != 0);
456  p=p->next(),q=q->next())
457  {
458  if ((*ms)(q) == WFST_ERROR_STATE)
459  return wfst_error;
460 
461  enum wfst_state_type dd = wl(p).state((*ms)(q))->type();
462 
463  if (dd == wfst_error)
464  return wfst_error;
465  else if (dd == wfst_nonfinal)
466  r = wfst_nonfinal;
467  }
468  return r;
469 }
470 
471 void EST_WFST::intersection(const EST_WFST &a, const EST_WFST &b)
472 {
473  // Intersect two WFSTs
474  wfst_list wl;
475 
476  wl.append(a);
477  wl.append(b);
478 
479  intersection(wl);
480 }
481 
482 /*******************************************************************/
483 /* Minimization is of complexity of O(n^2) */
484 /*******************************************************************/
485 void EST_WFST::minimize(const EST_WFST &nmwfst)
486 {
487  // Minimize a WFST
488  int p,q;
489  wfst_marks marks(nmwfst.num_states());
490  wfst_assumes assumptions;
491 
492  // For each combination of states
493  for (p=0; p < nmwfst.num_states()-1; p++)
494  for (q=p+1; q < nmwfst.num_states(); q++)
495  check_distinguished(nmwfst,p,q,marks,assumptions);
496 
497  // The marked array now has all different and lists to equivalent states
498  // Build an array mapping old name to new name.
499  int num_new_states;
500  int i;
501  EST_IVector state_map;
502 
503  marks.find_state_map(state_map,num_new_states);
504 
505  // Build the new minimized WFST mapping existing transitions
506  clear();
507  p_in_symbols.copy(nmwfst.p_in_symbols);
508  p_out_symbols.copy(nmwfst.p_out_symbols);
509 
510  init(num_new_states);
511  p_start_state = state_map(nmwfst.p_start_state);
512 
513  for (i=0; i < nmwfst.num_states(); i++)
514  {
515  if (p_states[state_map(i)] == 0)
516  p_states[state_map(i)] =
517  copy_and_map_states(state_map,nmwfst.state(i),nmwfst);
518  }
519 
520 }
521 
522 static int check_distinguished(const EST_WFST &nmwfst,
523  int p, int q,
524  wfst_marks &marks,
525  wfst_assumes &assumptions)
526 {
527  // Check to see if these two are equivalent
528  EST_Litem *t;
529  EST_IList from_p,from_q;
530 
531  if (marks.distinguished(p,q)) // been here, done that
532  return TRUE;
533  else if (marks.undistinguished(p,q)) // been here too
534  return FALSE;
535  // Not been here yet so do some work to try to find out if
536  // these states can be distinguished
537  else if ((nmwfst.state(p)->type() != nmwfst.state(q)->type()) ||
538  (nmwfst.state(p)->num_transitions() !=
539  nmwfst.state(q)->num_transitions()))
540  { // Different final/non-final type or different number
541  // of transitions so obviously different states
542  marks.distinguish(p,q);
543  return TRUE;
544  }
545  else
546  { // Have to check their transitions individually
547  for (t=nmwfst.state(p)->transitions.head(); t != 0; t=t->next())
548  {
549  int in = nmwfst.state(p)->transitions(t)->in_symbol();
550  int out = nmwfst.state(p)->transitions(t)->out_symbol();
551  int y = nmwfst.state(p)->transitions(t)->state();
552  int z = nmwfst.transition(q,in,out);
553  if ((z == WFST_ERROR_STATE) ||
554  (marks.distinguished(y,z)))
555  { // no equiv transition or obviously different states
556  marks.distinguish(p,q);
557  return TRUE;
558  }
559  else if (equivalent_to(y,z,assumptions))
560  continue;
561  else // Potential equivalence, record y,z for later
562  {
563  from_p.append(y);
564  from_q.append(z);
565  }
566  }
567  // All transitions had potential match so only now
568  // actually check their follow sets
569  EST_Litem *yp, *zp;
570  int tl = FALSE;
571  if (assumptions.length() == 0)
572  tl = TRUE;
573  // assume they are undistinguished
574  add_assumption(p,q,assumptions);
575  for (yp=from_p.head(),zp=from_q.head();
576  yp != 0; yp=yp->next(),zp=zp->next())
577  if (check_distinguished(nmwfst,from_p(yp),from_q(zp),
578  marks,
579  assumptions))
580  {
581  marks.distinguish(p,q); // set the distinguished
582  assumptions.clear();
583  return TRUE;
584  }
585  // ok I give up, they are the same
586  if (tl)
587  { // This is equivalent given the assumptions (and no
588  // higher level assumptions) so we can mark these states
589  // as undistinguished (and all the assumptions)
590  mark_undistinguished(marks,assumptions);
591  assumptions.clear();
592  }
593  return FALSE;
594  }
595 }
596 
597 void EST_WFST::extend_alphabets(const EST_WFST &b)
598 {
599  // Extend current in/out alphabets to accommodate anything in b's
600  // that are not in a's
601  // This guarantees that the number in this will still be valid
602  EST_StrList ivocab, ovocab;
603  int i;
604 
605  for (i=0; i<p_in_symbols.length(); i++)
606  ivocab.append(in_symbol(i));
607  for (i=0; i<b.p_in_symbols.length(); i++)
608  if (!strlist_member(ivocab,b.in_symbol(i)))
609  ivocab.append(b.in_symbol(i));
610  for (i=0; i<p_out_symbols.length(); i++)
611  ovocab.append(out_symbol(i));
612  for (i=0; i<b.p_out_symbols.length(); i++)
613  if (!strlist_member(ovocab,b.out_symbol(i)))
614  ovocab.append(b.out_symbol(i));
615 
616  p_in_symbols.init(ivocab);
617  p_out_symbols.init(ovocab);
618 }
619 
620 EST_WFST_State *EST_WFST::copy_and_map_states(const EST_IVector &state_map,
621  const EST_WFST_State *s,
622  const EST_WFST &b) const
623 {
624  // Copy s into new state mapping new states to those in state map
625  EST_WFST_State *ns = new EST_WFST_State(state_map(s->name()));
626  EST_Litem *i;
627 
628  ns->set_type(s->type());
629 
630  for (i=s->transitions.head(); i != 0; i=i->next())
631  {
632  int mapped_state= state_map(s->transitions(i)->state());
633  if (mapped_state != WFST_ERROR_STATE)
634  ns->add_transition(s->transitions(i)->weight(),
635  mapped_state,
636  in_symbol(b.in_symbol(s->transitions(i)->in_symbol())),
637  out_symbol(b.out_symbol(s->transitions(i)->out_symbol())));
638  }
639 
640  return ns;
641 }
642 
643 /***********************************************************************/
644 /* Build a WFST which doesn't accept any of the strings that a accepts */
645 /* This keeps the same in/out alphabet */
646 /***********************************************************************/
648 {
649  int i;
650 
651  copy(a);
652 
653  for (i=0; i < num_states(); i++)
654  {
655  if (p_states[i]->type() == wfst_final)
656  p_states[i]->set_type(wfst_nonfinal);
657  else if (p_states[i]->type() == wfst_nonfinal)
658  p_states[i]->set_type(wfst_final);
659  // errors remain errors
660  }
661 }
662 
663 static int noloopstostart(const EST_WFST &a)
664 {
665  // TRUE if there are no transitions leading to the start state
666  // when this is true there is a union operation which preserves
667  // deterministicness
668  int i;
669 
670  for (i=0; i < a.num_states(); i++)
671  {
672  const EST_WFST_State *s = a.state(i);
673  EST_Litem *p;
674  for (p=s->transitions.head(); p != 0; p=p->next())
675  {
676  if (s->transitions(p)->state() == a.start_state())
677  return FALSE;
678  }
679  }
680  return TRUE;
681 }
682 
683 int EST_WFST::deterministiconstartstates(const EST_WFST &a, const EST_WFST &b) const
684 {
685  // TRUE if there are no common transition labels from a and b's
686  // start state
687  EST_IMatrix tab;
688  int in,out;
689 
690  tab.resize(a.p_in_symbols.length(),a.p_out_symbols.length());
691  tab.fill(0);
692 
693  for (EST_Litem *t=a.state(a.start_state())->transitions.head();
694  t != 0; t=t->next())
695  {
696  tab(a.state(a.start_state())->transitions(t)->in_symbol(),
697  a.state(a.start_state())->transitions(t)->out_symbol()) = 1;
698  }
699 
700  for (EST_Litem *tt=b.state(b.start_state())->transitions.head();
701  tt != 0; tt=tt->next())
702  {
703  in = a.in_symbol(b.in_symbol(b.state(b.start_state())->transitions(tt)->in_symbol()));
704  out = a.out_symbol(b.out_symbol(b.state(b.start_state())->transitions(tt)->out_symbol()));
705  if (in == -1)
706  continue; // obviously not a clash
707  else if (out == -1)
708  continue; // obviously not a clash
709  else if (tab(in,out) == 1)
710  return FALSE;
711  }
712  return TRUE;
713 }
714 
715 /***********************************************************************/
716 /* Build a WFST which accepts both strings of a and of b */
717 /***********************************************************************/
718 void EST_WFST::uunion(const EST_WFST &a,const EST_WFST &b)
719 {
720  EST_IVector smap;
721  int i;
722 
723  copy(a);
724  extend_alphabets(b);
725 
726  if (a.deterministic() && b.deterministic() &&
727  noloopstostart(a) && noloopstostart(b) &&
728  deterministiconstartstates(a,b))
729  {
730  // This does the union without the epsilon and will preserve
731  // deterministic wfsts in this special case
732  smap.resize(b.num_states());
733  smap[0] = start_state();
734  for (i=1; i < b.num_states(); ++i)
735  smap[i] = i+a.num_states()-1;
736 
737  more_states(a.num_states()+b.num_states()-1);
738  p_num_states += b.num_states()-1;
739  for (i=1; i < b.num_states(); i++)
740  p_states[smap(i)] = copy_and_map_states(smap,b.state(i),b);
741 
742  const EST_WFST_State *s = b.state(b.start_state());
743  EST_Litem *p;
744  for (p=s->transitions.head(); p != 0; p=p->next())
745  {
746  int mapped_state= smap(s->transitions(p)->state());
747  if (mapped_state != WFST_ERROR_STATE)
748  p_states[start_state()]->
749  add_transition(s->transitions(p)->weight(),
750  mapped_state,
751  in_symbol(b.in_symbol(s->transitions(p)->in_symbol())),
752  out_symbol(b.out_symbol(s->transitions(p)->out_symbol())));
753  }
754  }
755  else
756  { // do it the hard way
757  smap.resize(b.num_states());
758  for (i=0; i < b.num_states(); ++i)
759  smap[i] = i+a.num_states();
760 
761  more_states(a.num_states()+b.num_states());
762  p_num_states += b.num_states();
763  for (i=0; i < b.num_states(); i++)
764  p_states[smap(i)] = copy_and_map_states(smap,b.state(i),b);
765 
766  // Actually do the union bit by adding an epsilon transition from
767  // the start of a to start state of b
768  p_states[start_state()]->add_transition(0.0,smap[b.start_state()],
769  in_epsilon(), out_epsilon());
770  }
771 
772 }
773 
774 /***********************************************************************/
775 /* Build a WFST which a followed by b */
776 /***********************************************************************/
777 void EST_WFST::concat(const EST_WFST &a,const EST_WFST &b)
778 {
779  EST_IVector smap;
780  int i;
781 
782  copy(a);
783  extend_alphabets(b);
784 
785  smap.resize(b.num_states());
786  for (i=0; i < b.num_states(); ++i)
787  smap[i] = i+a.num_states();
788 
789  more_states(a.num_states()+b.num_states());
790 
791  // everything final in a becomes non final and an epsilon transition
792  // goes from them to the start of b
793  for (i=0; i < num_states(); i++)
794  {
795  if (p_states[i]->type() == wfst_final)
796  {
797  p_states[i]->set_type(wfst_nonfinal);
798  p_states[i]->add_transition(0.0,smap[b.start_state()],
799  in_epsilon(), out_epsilon());
800  }
801  }
802 
803  p_num_states += b.num_states();
804  for (i=0; i < b.num_states(); i++)
805  p_states[smap(i)] = copy_and_map_states(smap,b.state(i),b);
806 
807 }
808 
809 /***********************************************************************/
810 /* Build a WFST from composing a and b (feeding output of a to */
811 /* input of b) */
812 /***********************************************************************/
813 void EST_WFST::compose(const EST_WFST &a,const EST_WFST &b)
814 {
816  EST_WFST_MultiState *nms,*current;
817  Agenda multistate_agenda;
819  wfst_list wl;
820  EST_WFST t;
821  int i,new_name;
822  EST_Litem *p,*q;
823 
824  clear();
825  p_in_symbols.copy(a.p_in_symbols);
826  p_out_symbols.copy(b.p_out_symbols);
827 
828  // Unfortunately need to needlessly copy a and b here
829  wl.append(a);
830  start_state->add(a.start_state());
831  wl.append(b);
832  start_state->add(b.start_state());
833 
834  p_start_state = add_state(intersect_state_type(wl,start_state));
835  // Label multistate start start with single-state name
836  start_state->set_name(p_start_state);
837 
838  multistate_agenda.append(start_state); // initialize agenda
839 
840  while (multistate_agenda.length() > 0)
841  {
842  current = multistate_agenda.first();
843  multistate_agenda.remove(multistate_agenda.head());
844 
845  // For all possible in/out pairs
846  for (i=0; i < p_in_symbols.length(); i++)
847  { // start at 2 to skip any and epsilon characters -- hmm bad
848  // find transitions
849  wfst_translist transa;
850 
851  wl.first().transduce(current->first(),i,transa);
852  for (p=transa.head(); p != 0; p=p->next())
853  {
854  wfst_translist transb;
855  // feed a's out to b'is in
856  wl.last().transduce(
857  current->last(),
858  b.in_symbol(a.out_symbol(transa(p)->out_symbol())),
859  transb);
860  for (q = transb.head(); q != 0; q=q->next())
861  {
863  nms->add(transa(p)->state());
864  nms->add(transb(q)->state());
865 
866  if (intersect_state_type(wl,nms) == wfst_error)
867  {
868  delete nms;
869  continue; // no state to go to
870  }
871  new_name = multistate_index(index,nms,p_num_states);
872  if (new_name == p_num_states) // genuinely new and unseen
873  { // create a real state and add it to the agenda
874  int ns = add_state(intersect_state_type(wl,nms));
875  nms->set_name(ns);
876  multistate_agenda.append(nms);
877  }
878  else // already seen this state, and is already named
879  nms->set_name(new_name);
880 
881  // Add new transition to current state
882  p_states[current->name()]
883  ->add_transition(nms->weight(),nms->name(),
884  i,transb(q)->out_symbol());
885 
886  }
887 
888  }
889  }
890  delete current;
891  // Probably want some progress summary
892  }
893 
894 }
895 
896 /***********************************************************************/
897 /* Build a WFST which accepts strings in a but not in b */
898 /***********************************************************************/
899 void EST_WFST::difference(const EST_WFST &a,const EST_WFST &b)
900 {
901  EST_WFST compb;
902 
903  // This is sort of a complement, but not quite
904  // But what would the name of this operation be?
905  compb.copy(b);
906  for (int i=0; i < compb.num_states(); i++)
907  {
908  if (compb.p_states[i]->type() == wfst_final)
909  compb.p_states[i]->set_type(wfst_error);
910  }
911 
912  uunion(a,compb);
913 }
914 
915 /***********************************************************************/
916 /* Remove states from which a final state can't be reached */
917 /***********************************************************************/
919 {
920  // Find all states which are reachable from the start state, and
921  // can reach a final state. Mark all others as error states
922  wfst_list wl;
923 
924  wl.append(a);
925  EST_WFST &ab = wl.first();
926 
927  ab.current_tag = ++traverse_tag;
928  for (int i=0; i < ab.p_num_states; i++)
929  ab.can_reach_final(i);
930  // This will copy only the non-error states
931  intersection(wl);
932 
933 }
934 
935 int EST_WFST::can_reach_final(int state)
936 {
937  // Return TRUE iff this state is final or can reach a final state
938 
939  if (p_states[state]->type() == wfst_final)
940  return TRUE;
941  else if (p_states[state]->type() == wfst_error)
942  return FALSE;
943  else if (p_states[state]->tag() == current_tag)
944  // Been here and it is reachable
945  return TRUE;
946  else
947  {
948  EST_Litem *i;
949  enum wfst_state_type current_val = p_states[state]->type();
950  enum wfst_state_type r = wfst_error;
951  // temporarily set this to error to stop infinite recursion
952  p_states[state]->set_type(wfst_error);
953 
954  for (i=p_states[state]->transitions.head(); i != 0; i=i->next())
955  // if any transition goes to something that reaches a final state
956  // set the value back to its original
957  if (can_reach_final(p_states[state]->transitions(i)->state()))
958  r = current_val;
959 
960  // Will be set back to original value iff a final state
961  // is reachable from here
962  p_states[state]->set_type(r);
963  if (r == wfst_error)
964  return FALSE;
965  else
966  {
967  p_states[state]->set_tag(current_tag);
968  return TRUE;
969  }
970  }
971 }
972 
973 /***********************************************************************/
974 /* True is wfst is deterministic */
975 /***********************************************************************/
977 {
978  // True if all states contains no multiple arcs with the same symbol
979  EST_IMatrix tab;
980 
981  tab.resize(p_in_symbols.length(),p_out_symbols.length());
982 
983  for (int i=0; i < p_num_states; i++)
984  {
985  tab.fill(0);
986  for (EST_Litem *t=state(i)->transitions.head(); t != 0; t=t->next())
987  {
988  if (tab(state(i)->transitions(t)->in_symbol(),
989  state(i)->transitions(t)->out_symbol()) == 1)
990  return FALSE;
991  else
992  tab(state(i)->transitions(t)->in_symbol(),
993  state(i)->transitions(t)->out_symbol()) = 1;
994  }
995  }
996  return TRUE;
997 }
void clear()
Empty list.
Definition: EST_TKVL.cc:50
void minimize(const EST_WFST &a)
Build minimized form of a.
Definition: wfst_ops.cc:485
void complement(const EST_WFST &a)
Build complement of a.
Definition: wfst_ops.cc:647
void add_assumption(int y, int z, wfst_assumes &assumptions)
Definition: wfst_aux.cc:95
int transition(int state, int in, int out) const
Find (first) new state given in and out symbols.
Definition: EST_WFST.cc:264
int start_state() const
Definition: EST_WFST.h:206
int deterministic() const
True if WFST is deterministic.
Definition: wfst_ops.cc:976
EST_UItem * prev()
Definition: EST_UList.h:56
a call representing a weighted finite-state transducer
Definition: EST_WFST.h:154
int equivalent_to(int y, int z, wfst_assumes &assumptions)
Definition: wfst_aux.cc:131
int multistate_index(EST_WFST_MultiStateIndex &index, EST_WFST_MultiState *ms, int proposed)
Definition: wfst_ops.cc:125
#define Instantiate_TStructIterator_T(CONTAINER, IP, ENTRY, TAG)
const T & last() const
return const reference to last item in list
Definition: EST_TList.h:155
void add(int i)
Definition: wfst_ops.cc:105
LISP append(LISP l1, LISP l2)
Definition: slib_list.cc:177
int length(void) const
The number of members in the discrete.
EST_String itoString(int n)
Make a EST_String object from an integer.
Definition: util_io.cc:141
void transition_all(int state, int in, int out, EST_WFST_MultiState *ms) const
Find all possible transitions for given state/input/output.
Definition: wfst_ops.cc:295
void intersection(EST_TList< EST_WFST > &wl)
Definition: wfst_ops.cc:357
A specialised hash table for when the key is an EST_String.
Definition: EST_THash.h:304
#define WFST_ERROR_STATE
Definition: EST_WFST.h:52
void determinize(const EST_WFST &a)
Build determinized form of a.
Definition: wfst_ops.cc:166
int in_symbol(const EST_String &s) const
Map input symbol to input alphabet index.
Definition: EST_WFST.h:208
int index(EST_TList< T > &l, T &val, bool(*eq)(const EST_UItem *, const EST_UItem *)=NULL)
Definition: EST_TList.h:286
EST_UItem * next()
Definition: EST_UList.h:55
int undistinguished(int p, int q)
Definition: wfst_aux.h:63
Declare_TList_T(EST_WFST_MultiState *, EST_WFST_MultiStateP) typedef EST_TKVI< int
an internal class to EST_WFST used in holding multi-states when determinizing and find the intersecti...
Definition: EST_WFST.h:130
void fill(const T &v)
fill matrix with value v
Definition: EST_TMatrix.cc:314
const char * get_c_string(LISP x)
Definition: slib.cc:638
EST_String error_name(const EST_Features &a)
Templated Key-Value Item. Serves as the items in the list of the EST_TKVL class.
Definition: EST_TKVL.h:55
float weight() const
Definition: EST_WFST.h:142
wfst_state_type
Definition: EST_WFST.h:85
an internal class for EST_WFST used to represent a state in a WFST
Definition: EST_WFST.h:98
V & val(const K &key, int &found) const
Definition: EST_THash.cc:114
int out_symbol(const EST_String &s) const
Map output symbol to output alphabet index.
Definition: EST_WFST.h:214
enum wfst_state_type type() const
Definition: EST_WFST.h:116
void compose(const EST_WFST &a, const EST_WFST &b)
Definition: wfst_ops.cc:813
const EST_WFST_State * state(int i) const
Return internal state information.
Definition: EST_WFST.h:226
EST_WFST_Transition * add_transition(float w, int end, int in, int out)
Definition: EST_WFST.cc:101
#define FALSE
Definition: EST_bool.h:119
void concat(const EST_WFST &a, const EST_WFST &b)
Definition: wfst_ops.cc:777
EST_TVector< T > & copy(EST_TVector< T > a, const EST_TList< T > &in)
void uunion(EST_TList< EST_WFST > &wl)
Templated Key-Value list. Objects of type EST_TKVL contain lists which are accessed by a key of type ...
Definition: EST_TKVL.h:73
const T & first() const
return const reference to first item in list
Definition: EST_TList.h:152
#define Instantiate_TIterator_T(CONTAINER, IP, ENTRY, TAG)
void set_type(wfst_state_type t)
Definition: EST_WFST.h:117
EST_IList KVI_int_EST_IList_t
Definition: wfst_ops.cc:56
int distinguished(int p, int q)
Definition: wfst_aux.h:62
void distinguish(int p, int q)
Definition: wfst_aux.h:65
void add_epsilon_reachable(EST_WFST_MultiState *ms) const
Extend multi-state with epsilon reachable states.
Definition: wfst_ops.cc:320
void mark_undistinguished(wfst_marks &marks, wfst_assumes &assumptions)
Definition: wfst_aux.cc:161
EST_UItem * tail() const
Definition: EST_UList.h:99
Declare_TList_N(KVI_int_EST_IList_t, 0) typedef EST_TList< EST_WFST_MultiState * > Agenda
void append(const T &item)
add item onto end of list
Definition: EST_TList.h:196
int add_item(const K &key, const V &value, int no_search=0)
Add an entry to the table.
Definition: EST_THash.cc:167
int length() const
Definition: EST_UList.cc:57
#define Instantiate_TList_T_MIN(TYPE, TAG)
Definition: EST_TListI.h:52
int strlist_member(const EST_StrList &l, const EST_String &s)
Return true if s is in list l.
void set_name(int i)
Definition: EST_WFST.h:141
int num_states() const
Definition: EST_WFST.h:205
enum wfst_state_type ms_type(EST_WFST_MultiState *ms) const
Given a multi-state return type (final, ok, error)
Definition: wfst_ops.cc:272
An open hash table. The number of buckets should be set to allow enough space that there are relative...
Definition: EST_THash.h:69
void remove_error_states(const EST_WFST &a)
Remove error states from the WFST.
Definition: wfst_ops.cc:918
int name() const
Definition: EST_WFST.h:140
EST_UItem * head() const
Definition: EST_UList.h:97
EST_WFST_MultiState * apply_multistate(const EST_WFST &wfst, EST_WFST_MultiState *ms, int in, int out) const
Transduce a multi-state given n and out.
Definition: wfst_ops.cc:251
int length() const
number of key value pairs in list
Definition: EST_TKVL.h:97
void resize(int rows, int cols, int set=1)
resize matrix
int name() const
Definition: EST_WFST.h:114
#define TRUE
Definition: EST_bool.h:118
void difference(const EST_WFST &a, const EST_WFST &b)
Definition: wfst_ops.cc:899
void copy(const EST_WFST &wfst)
Copy from existing wfst.
Definition: EST_WFST.cc:136
wfst_translist transitions
Definition: EST_WFST.h:104
void clear(void)
remove all items in list
Definition: EST_TList.h:244
void resize(int n, int set=1)
resize vector
int num_transitions() const
Definition: EST_WFST.h:115