Edinburgh Speech Tools  2.1-release
EST_WFST.h
Go to the documentation of this file.
1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1997 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author : Alan W Black */
34 /* Date : November 1997 */
35 /*-----------------------------------------------------------------------*/
36 /* */
37 /* Weighted Finite State Transducers */
38 /* */
39 /*=======================================================================*/
40 #ifndef __EST_WFST_H__
41 #define __EST_WFST_H__
42 
43 #include "EST_simplestats.h"
44 #include "EST_rw_status.h"
45 #include "EST_Option.h"
46 #include "EST_TList.h"
47 #include "EST_TVector.h"
48 #include "EST_THash.h"
49 #include "siod.h"
50 #define wfst_error_msg(WMESS) (std::cerr << WMESS << std::endl,siod_error())
51 
52 #define WFST_ERROR_STATE -1
53 
54 class EST_WFST_State;
55 class EST_WFST;
56 
57 /** \class EST_WFST_Transition
58  * \brief an internal class for \ref EST_WFST for representing transitions
59  in an WFST
60  */
62  private:
63  float p_weight;
64  int p_state;
65  int p_in_symbol;
66  int p_out_symbol;
67  public:
70  { p_weight=t.p_weight; p_state=t.p_state;
71  p_in_symbol = t.p_in_symbol; p_out_symbol=t.p_out_symbol; }
72  EST_WFST_Transition(float w, int s, int i, int o)
73  { p_weight=w; p_state=s; p_in_symbol=i; p_out_symbol=o;}
75  float weight() const { return p_weight; }
76  int state() const { return p_state; }
77  int in_symbol() const { return p_in_symbol; }
78  int out_symbol() const { return p_out_symbol; }
79  void set_weight(float f) { p_weight = f; }
80  void set_state(int s) { p_state = s; }
81 
82 };
84 
86 /** I'd like to use the enums but I need to binary read/write them **/
87 /** I don't believe that's portable so we need to have ints for these **/
88 #define WFST_FINAL 0
89 #define WFST_NONFINAL 1
90 #define WFST_ERROR 2
91 #define WFST_LICENCE 3
92 
93 
94 /** \class EST_WFST_State
95  * \brief an internal class for \ref EST_WFST used to represent a
96  state in a WFST
97 */
99  private:
100  int p_name;
101  enum wfst_state_type p_type;
102  int p_tag; // for marking in traversing
103  public:
105 
106  EST_WFST_State(int name);
108  ~EST_WFST_State();
109 
110  EST_WFST_Transition *add_transition(float w,
111  int end,
112  int in,
113  int out);
114  int name() const { return p_name; }
115  int num_transitions() const { return transitions.length(); }
116  enum wfst_state_type type() const { return p_type; }
117  void set_type(wfst_state_type t) { p_type = t; }
118  void set_tag(int v) { p_tag = v;}
119  int tag() const { return p_tag;}
120 };
122 
125 
126 /** \class EST_WFST_MultiState
127  * \brief an internal class to \ref EST_WFST used in holding multi-states
128  when determinizing and find the intersections of other WFSTs.
129  */
131  private:
132  int p_name;
133  float p_weight;
134  enum wfst_mstate_type p_type;
135  public:
137  { p_name = -1; p_weight = 0.0; p_type = wfst_ms_set; }
139  { p_name = -1; p_weight = 0.0; p_type = ty; }
140  int name() const { return p_name; }
141  void set_name(int i) { p_name = i; }
142  float weight() const { return p_weight; }
143  void set_weight(float w) { p_weight = w; }
144  void set_type(enum wfst_mstate_type s) { p_type = s; }
145  enum wfst_mstate_type type() const { return p_type; }
146  void add(int i);
147 };
148 
150 
151 /** \class EST_WFST
152  * \brief a call representing a weighted finite-state transducer
153  */
154 class EST_WFST {
155  private:
156  EST_Discrete p_in_symbols;
157  EST_Discrete p_out_symbols;
158  int p_start_state;
159  int current_tag;
160  int p_num_states;
161  int p_cumulate;
162  wfst_state_vector p_states;
163 
164  int operator_and(LISP l);
165  int operator_or(LISP l);
166  int operator_star(LISP l);
167  int operator_plus(LISP l);
168  int operator_optional(LISP l);
169  int operator_not(LISP l);
170  int terminal(LISP l);
171  EST_WFST_State *copy_and_map_states(const EST_IVector &state_map,
172  const EST_WFST_State *s,
173  const EST_WFST &b) const;
174  void extend_alphabets(const EST_WFST &b);
175  int deterministiconstartstates(const EST_WFST &a, const EST_WFST &b) const;
176  EST_read_status load_transitions_from_lisp(int s, LISP trans);
177  void more_states(int new_max);
178 
179  int can_reach_final(int state);
180  static int traverse_tag;
181  public:
182  /**@name Constructor and initialisation functions */
183  ///@{
184  /// ?
185  EST_WFST();
186  /// ?
187  EST_WFST(const EST_WFST &wfst) { p_num_states = 0; copy(wfst); }
188  ~EST_WFST();
189  ///@}
190 
191  /**@name Reseting functions */
192  ///@{
193  /// Clear with (estimation of number of states required)
194  void init(int init_num_states=10);
195  /// clear an initialise with given input and out alphabets
196  void init(LISP in, LISP out);
197  /// Copy from existing wfst
198  void copy(const EST_WFST &wfst);
199  /// clear removing existing states if any
200  void clear();
201  ///@}
202 
203  /**@name General utility functions */
204  ///@{
205  int num_states() const { return p_num_states; }
206  int start_state() const { return p_start_state; }
207  /// Map input symbol to input alphabet index
208  int in_symbol(const EST_String &s) const
209  { return p_in_symbols.name(s); }
210  /// Map input alphabet index to input symbol
211  const EST_String &in_symbol(int i) const
212  { return p_in_symbols.name(i); }
213  /// Map output symbol to output alphabet index
214  int out_symbol(const EST_String &s) const
215  { return p_out_symbols.name(s); }
216  /// Map output alphabet index to output symbol
217  const EST_String &out_symbol(int i) const
218  { return p_out_symbols.name(i); }
219  /// LISP for on epsilon symbols
220  LISP epsilon_label() const { return rintern("__epsilon__"); }
221  /// Internal index for input epsilon
222  int in_epsilon() const { return p_in_symbols.name("__epsilon__"); }
223  /// Internal index for output epsilon
224  int out_epsilon() const { return p_out_symbols.name("__epsilon__"); }
225  /// Return internal state information
226  const EST_WFST_State *state(int i) const { return p_states(i); }
227  /// Return internal state information (non-const)
228  EST_WFST_State *state_non_const(int i) { return p_states(i); }
229  /// True if state `i` is final
230  int final(int i) const
231  { return ((i != WFST_ERROR_STATE) && (state(i)->type() == wfst_final));}
232  /// Accessing the input alphabet
233  const EST_Discrete &in_symbols() const { return p_in_symbols; }
234  /// Accessing the output alphabet
235  const EST_Discrete &out_symbols() const { return p_out_symbols; }
236 
237  ///@}
238 
239  /**@name file i/o */
240  ///@{
241  /// ?
242  EST_write_status save(const EST_String &filename,
243  const EST_String type = "ascii");
244  EST_write_status save_binary(FILE *fd);
245  /// ?
246  EST_read_status load(const EST_String &filename);
247 
248  EST_read_status load_binary(FILE *fd,
249  EST_Option &hinfo,
250  int num_states,
251  int swap);
252  ///@}
253 
254  /**@name transduction functions */
255  ///@{
256  /// Find (first) new state given in and out symbols
257  int transition(int state,int in, int out) const;
258  int transition(int state,int in, int out, float &prob) const;
259  /// Find (first) transition given in and out symbols
260  EST_WFST_Transition *find_transition(int state,int in, int out) const;
261  /// Find (first) new state given in and out strings
262  int transition(int state,const EST_String &in,const EST_String &out) const;
263  /// Find (first) new state given in/out string
264  int transition(int state,const EST_String &inout) const;
265  /// Transduce in to out from state
266  int transduce(int state,int in,int &out) const;
267  /// Transduce in to out (strings) from state
268  int transduce(int state,const EST_String &in,EST_String &out) const;
269  /// Transduce in to list of transitions
270  void transduce(int state,int in,wfst_translist &out) const;
271  /// Find all possible transitions for given state/input/output
272  void transition_all(int state,int in, int out,
273  EST_WFST_MultiState *ms) const;
274 
275  ///@}
276 
277  /**@name Cumulation functions for adding collective probabilities
278  for transitions from data */
279  ///@{
280  /// Cumulation condition
281  int cumulate() const {return p_cumulate;}
282  /// Clear and start cumulation
283  void start_cumulate();
284  /// Stop cumulation and calculate probabilities on transitions
285  void stop_cumulate();
286  ///@}
287 
288  /**@name WFST construction functions from external representations **/
289  ///@{
290  /// Add a new state, returns new name
291  int add_state(enum wfst_state_type state_type);
292  /// Given a multi-state return type (final, ok, error)
293  enum wfst_state_type ms_type(EST_WFST_MultiState *ms) const;
294 
295  /// Basic regex constructor
296  void build_wfst(int start, int end,LISP regex);
297  /// Basic conjunction constructor
298  void build_and_transition(int start, int end, LISP conjunctions);
299  /// Basic disjunction constructor
300  void build_or_transition(int start, int end, LISP disjunctions);
301 
302  // from standard REGEX
303  void build_from_regex(LISP inalpha, LISP outalpha, LISP regex);
304  // Kay/Kaplan/Koskenniemi rule compile
305  void kkrule_compile(LISP inalpha, LISP outalpha, LISP fp,
306  LISP rule, LISP sets);
307  // Build from regular (or pseudo-CF) grammar
308  void build_from_rg(LISP inalpha, LISP outalpha,
309  LISP distinguished, LISP rewrites,
310  LISP sets, LISP terms,
311  int max_depth);
312  // Build simple tree lexicon
313  void build_tree_lex(LISP inalpha, LISP outalpha,
314  LISP wlist);
315  ///@}
316 
317  /**@name Basic WFST operators */
318  ///@{
319  /// Build determinized form of a
320  void determinize(const EST_WFST &a);
321  /// Build minimized form of a
322  void minimize(const EST_WFST &a);
323  /// Build complement of a
324  void complement(const EST_WFST &a);
325  /** Build intersection of all WFSTs in given list. The new WFST
326  recognizes the only the strings that are recognized by all WFSTs
327  in the given list */
328  void intersection(EST_TList<EST_WFST> &wl);
329  /** Build intersection of WFSTs a and b The new WFST
330  recognizes the only the strings that are recognized by both a
331  and b list */
332  void intersection(const EST_WFST &a, const EST_WFST &b);
333  /** Build union of all WFSTs in given list. The new WFST
334  recognizes the only the strings that are recognized by at least
335  one WFSTs in the given list */
336  void uunion(EST_TList<EST_WFST> &wl);
337  /** Build union of WFSTs a and b. The new WFST
338  recognizes the only the strings that are recognized by either
339  a or b */
340  void uunion(const EST_WFST &a, const EST_WFST &b);
341  /** Build new WFST by composition of a and b. Outputs of a are
342  fed to b, given a new WFSTs which has a's input language and b's
343  output set. a's output and b's input alphabets must be the same */
344  void compose(const EST_WFST &a,const EST_WFST &b);
345  /** Build WFST that accepts only strings in a that aren't also accepted
346  by strings in b. */
347  void difference(const EST_WFST &a,const EST_WFST &b);
348  /** Build WFST that accepts a language that consists of any string in
349  a followed by any string in b **/
350  void concat(const EST_WFST &a,const EST_WFST &b);
351  ///@}
352 
353  /**@name construction support functions */
354  ///@{
355  /// True if WFST is deterministic
356  int deterministic() const;
357  /// Transduce a multi-state given n and out
358  EST_WFST_MultiState *apply_multistate(const EST_WFST &wfst,
360  int in, int out) const;
361  /// Extend multi-state with epsilon reachable states
362  void add_epsilon_reachable(EST_WFST_MultiState *ms) const;
363  /// Remove error states from the WFST.
364  void remove_error_states(const EST_WFST &a);
365 
366  EST_String summary() const;
367  /// ?
368  EST_WFST & operator = (const EST_WFST &a) { copy(a); return *this; }
369  ///@}
370 };
372 
373 // The basic operations on WFST
374 void minimize(EST_WFST &a,EST_WFST &b);
375 void determinize(EST_WFST &a,EST_WFST &b);
376 void concat(EST_WFST &a,EST_WFST &b,EST_WFST &c);
377 void compose(EST_WFST &a,EST_WFST &b,EST_WFST &c);
378 void intersect(wfst_list &wl,EST_WFST &wfst);
379 void complement(EST_WFST &a,EST_WFST &b);
380 void difference(EST_WFST &a,EST_WFST &b,EST_WFST &c);
381 void concatenate(EST_WFST &a,EST_WFST &b,EST_WFST &c);
382 
383 // Compile a set of KK rules
384 void kkcompile(LISP ruleset, EST_WFST &all_wfst);
385 // Compile a set of LTS rules
386 void ltscompile(LISP lts_rules, EST_WFST &all_wfst);
387 // Compile a regular grammar
388 void rgcompile(LISP rg, EST_WFST &all_wfst);
389 // Compile a tree lexicon
390 void tlcompile(LISP rg, EST_WFST &all_wfst);
391 
392 // Transduction and recognition functions
393 int transduce(const EST_WFST &wfst,const EST_StrList &in,EST_StrList &out);
394 int transduce(const EST_WFST &wfst,const EST_IList &in,EST_IList &out);
395 int recognize(const EST_WFST &wfst,const EST_StrList &in,int quiet);
396 int recognize(const EST_WFST &wfst,const EST_IList &in,
397  const EST_IList &out,int quite);
398 int recognize_for_perplexity(const EST_WFST &wfst,
399  const EST_StrList &in,
400  int quiet,
401  float &count,
402  float &sumlogp);
403 int recognize_for_perplexity(const EST_WFST &wfst,
404  const EST_IList &in,
405  const EST_IList &out,
406  int quiet,
407  float &count,
408  float &sumlogp);
409 
411 
412 #endif
void set_tag(int v)
Definition: EST_WFST.h:118
EST_TVector< EST_WFST_State * > wfst_state_vector
Definition: EST_WFST.h:121
void set_state(int s)
Definition: EST_WFST.h:80
LISP epsilon_label() const
LISP for on epsilon symbols.
Definition: EST_WFST.h:220
float end(const EST_Item &item)
Definition: EST_item_aux.cc:96
int multistate_index(EST_WFST_MultiStateIndex &i, EST_WFST_MultiState *ms)
void ltscompile(LISP lts_rules, EST_WFST &all_wfst)
Definition: ltscompile.cc:63
int tag() const
Definition: EST_WFST.h:119
void rgcompile(LISP rg, EST_WFST &all_wfst)
Definition: rgcompile.cc:58
int start_state() const
Definition: EST_WFST.h:206
EST_write_status
EST_WFST_MultiState(enum wfst_mstate_type ty)
Definition: EST_WFST.h:138
an internal class for EST_WFST for representing transitions in an WFST
Definition: EST_WFST.h:61
void difference(EST_WFST &a, EST_WFST &b, EST_WFST &c)
a call representing a weighted finite-state transducer
Definition: EST_WFST.h:154
EST_TList< EST_WFST_Transition * > wfst_translist
Definition: EST_WFST.h:83
EST_WFST(const EST_WFST &wfst)
?
Definition: EST_WFST.h:187
bool save(Lattice &lattice, EST_String filename)
int out_epsilon() const
Internal index for output epsilon.
Definition: EST_WFST.h:224
const EST_String & name(const int n) const
The name given the index.
EST_TList< EST_WFST > wfst_list
Definition: EST_WFST.h:371
int transduce(const EST_WFST &wfst, const EST_StrList &in, EST_StrList &out)
int state() const
Definition: EST_WFST.h:76
bool load(Lattice &lattice, EST_String filename)
void determinize(EST_WFST &a, EST_WFST &b)
A specialised hash table for when the key is an EST_String.
Definition: EST_THash.h:304
wfst_mstate_type
Definition: EST_WFST.h:124
EST_WFST_Transition(float w, int s, int i, int o)
Definition: EST_WFST.h:72
void kkcompile(LISP ruleset, EST_WFST &all_wfst)
Definition: kkcompile.cc:72
#define WFST_ERROR_STATE
Definition: EST_WFST.h:52
float weight() const
Definition: EST_WFST.h:75
void intersect(wfst_list &wl, EST_WFST &wfst)
Definition: kkcompile.cc:370
int in_symbol(const EST_String &s) const
Map input symbol to input alphabet index.
Definition: EST_WFST.h:208
EST_TStringHash< int > EST_WFST_MultiStateIndex
Definition: EST_WFST.h:123
const EST_Discrete & out_symbols() const
Accessing the output alphabet.
Definition: EST_WFST.h:235
int cumulate() const
Cumulation condition.
Definition: EST_WFST.h:281
an internal class to EST_WFST used in holding multi-states when determinizing and find the intersecti...
Definition: EST_WFST.h:130
int recognize(const EST_WFST &wfst, const EST_StrList &in, int quiet)
float weight() const
Definition: EST_WFST.h:142
void complement(EST_WFST &a, EST_WFST &b)
const EST_Discrete & in_symbols() const
Accessing the input alphabet.
Definition: EST_WFST.h:233
EST_FVector add(const EST_FVector &a, const EST_FVector &b)
elementwise add
Definition: vec_mat_aux.cc:501
#define VAL_REGISTER_CLASS_DCLS(NAME, CLASS)
Definition: EST_Val_defs.h:44
wfst_state_type
Definition: EST_WFST.h:85
an internal class for EST_WFST used to represent a state in a WFST
Definition: EST_WFST.h:98
void set_weight(float w)
Definition: EST_WFST.h:143
void concat(EST_WFST &a, EST_WFST &b, EST_WFST &c)
int out_symbol(const EST_String &s) const
Map output symbol to output alphabet index.
Definition: EST_WFST.h:214
const EST_WFST_State * state(int i) const
Return internal state information.
Definition: EST_WFST.h:226
const EST_String & in_symbol(int i) const
Map input alphabet index to input symbol.
Definition: EST_WFST.h:211
int in_symbol() const
Definition: EST_WFST.h:77
EST_TVector< T > & copy(EST_TVector< T > a, const EST_TList< T > &in)
f
Definition: EST_item_aux.cc:48
void set_type(wfst_state_type t)
Definition: EST_WFST.h:117
int out_symbol() const
Definition: EST_WFST.h:78
void tlcompile(LISP rg, EST_WFST &all_wfst)
Definition: tlcompile.cc:57
void set_weight(float f)
Definition: EST_WFST.h:79
void minimize(EST_WFST &a, EST_WFST &b)
int length() const
Definition: EST_UList.cc:57
EST_read_status
float start(const EST_Item &item)
Definition: EST_item_aux.cc:52
int in_epsilon() const
Internal index for input epsilon.
Definition: EST_WFST.h:222
LISP rintern(const char *name)
Definition: slib.cc:734
void set_name(int i)
Definition: EST_WFST.h:141
int num_states() const
Definition: EST_WFST.h:205
EST_WFST_State * state_non_const(int i)
Return internal state information (non-const)
Definition: EST_WFST.h:228
void compose(EST_WFST &a, EST_WFST &b, EST_WFST &c)
int name() const
Definition: EST_WFST.h:140
void set_type(enum wfst_mstate_type s)
Definition: EST_WFST.h:144
EST_WFST_Transition(const EST_WFST_Transition &t)
Definition: EST_WFST.h:69
LISP fp
Definition: kkcompile.cc:63
int name() const
Definition: EST_WFST.h:114
void concatenate(EST_WFST &a, EST_WFST &b, EST_WFST &c)
const EST_String & out_symbol(int i) const
Map output alphabet index to output symbol.
Definition: EST_WFST.h:217
wfst_translist transitions
Definition: EST_WFST.h:104
int recognize_for_perplexity(const EST_WFST &wfst, const EST_StrList &in, int quiet, float &count, float &sumlogp)
int num_transitions() const
Definition: EST_WFST.h:115