Edinburgh Speech Tools  2.1-release
tlcompile.cc
Go to the documentation of this file.
1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1999 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author : Alan W Black */
34 /* Date : September 1999 */
35 /*-----------------------------------------------------------------------*/
36 /* */
37 /* Building tree lexicons from rules. This could almost be done by the */
38 /* regular grammar compiler but for efficiently reasons we have a */
39 /* specific compiler. As the ends of the trees are never minimized */
40 /* Actually it will take full context free grammars and convert them */
41 /* up to a specified rewrite depth */
42 /* */
43 /* Based loosely on "Finite State Machines from Features Grammars" by */
44 /* Black, International Workshop of Parsing Technologies, CMU 89 */
45 /* */
46 /*=======================================================================*/
47 #include <iostream>
48 #include "EST_cutils.h"
49 #include "EST_THash.h"
50 #include "EST_WFST.h"
51 
52 static LISP tl_find_l_w(LISP rules);
53 static int production_index(LISP state,
55  int proposed);
56 
57 void tlcompile(LISP tl, EST_WFST &all_wfst)
58 {
59  // Build a transducer from given tree-lexicon.
60  LISP l_w,letters,words;
61 // LISP sets=siod_nth(2,tl);
62  LISP rules=siod_nth(3,tl);
63 
64  l_w = tl_find_l_w(rules);
65  letters = car(l_w); // or phones or "bits" or whatever
66  words = cdr(l_w); // the things you are indexing
67 
68  all_wfst.build_tree_lex(letters,words,rules);
69 
70 }
71 
72 static LISP tl_find_l_w(LISP rules)
73 {
74  // Find the alphabets used in the rules
75  LISP letters=NIL,words=NIL,r,s;
76 
77  for (r=rules; r != NIL; r=cdr(r))
78  {
79  for (s = car(r); s != NIL; s=cdr(s))
80  {
81  if (streq("->",get_c_string(car(s))))
82  {
83  if (!siod_member_str(get_c_string(car(cdr(s))),words))
84  words = cons(car(cdr(s)),words);
85  break;
86  }
87  else if (!siod_member_str(get_c_string(car(s)),letters))
88  {
89  letters = cons(car(s),letters);
90  }
91  }
92  }
93 
94  return cons(letters,words);
95 }
96 
97 
98 void EST_WFST::build_tree_lex(LISP inalpha, LISP outalpha,
99  LISP wlist)
100 {
101  // Build a determinized tree lexicon for given list
102  LISP w,l;
103  int cs,ns;
104  EST_WFST_Transition *trans;
106  int fff;
107 
108  clear();
109  init(inalpha,outalpha);
110  int i_epsilon = in_epsilon();
111  int o_epsilon = out_epsilon();
112  float weight;
113 
114  // Create a starting state and add it to this WFST
115  p_start_state = add_state(wfst_nonfinal);
116  fff = add_state(wfst_final);
117 
118  for (w=wlist; w; w=cdr(w))
119  {
120 // lprint(car(w));
121  weight = get_c_float(car(siod_last(car(w))));
122  for (cs=p_start_state,l=car(w); l; l=cdr(l))
123  {
124  if (streq("->",get_c_string(car(l))))
125  {
126  // reached word
127  trans = find_transition(cs,i_epsilon,
128  p_out_symbols.name(get_c_string(car(cdr(l)))));
129  if (trans == 0)
130  {
131  p_states[cs]
132  ->add_transition(weight,
133  fff,i_epsilon,
134  p_out_symbols.name(get_c_string(car(cdr(l)))));
135  }
136 // else // duplicate word
137 // {
138 // cerr << "WFST: tlcompile, duplicate word ignored\n";
139 // }
140  break;
141  }
142  else
143  {
144  trans = find_transition(cs,
145  p_in_symbols.name(get_c_string(car(l))),
146  o_epsilon);
147  if (trans == 0)
148  {
149  ns = production_index(cdr(l),index,p_num_states);
150  if (ns == p_num_states)
151  ns = add_state(wfst_nonfinal);
152  p_states[cs]
153  ->add_transition(weight,
154  ns,
155  p_in_symbols.name(get_c_string(car(l))),
156  o_epsilon);
157  }
158  else // increment count
159  {
160  ns = trans->state();
161  trans->set_weight(trans->weight()+weight);
162  }
163  }
164  cs = ns;
165  }
166  }
167 
168  // normalized transition weights into probabilities
169  stop_cumulate();
170 }
171 
172 static int production_index(LISP state,
174  int proposed)
175 {
176  // Returns proposed if ms is not already in index, otherwise
177  // returns the value that was proposed when it was first new.
178 
179  // I'll have to make this more efficient in future.
180  EST_String istring("");
181  LISP p;
182  int ns,found;
183 
184  for (p=state; p != NIL; p = cdr(p))
185  istring += EST_String(get_c_string(car(p))) + " ";
186 
187  ns = index.val(istring,found);
188  if (found)
189  return ns;
190  else
191  {
192  index.add_item(istring,proposed);
193  return proposed;
194  }
195 }
LISP siod_last(LISP list)
Definition: siod.cc:258
float get_c_float(LISP x)
Definition: slib.cc:1858
int add_state(enum wfst_state_type state_type)
Add a new state, returns new name.
Definition: EST_WFST.cc:669
an internal class for EST_WFST for representing transitions in an WFST
Definition: EST_WFST.h:61
a call representing a weighted finite-state transducer
Definition: EST_WFST.h:154
void clear()
clear removing existing states if any
Definition: EST_WFST.cc:117
int out_epsilon() const
Internal index for output epsilon.
Definition: EST_WFST.h:224
const EST_String & name(const int n) const
The name given the index.
#define NIL
Definition: siod_defs.h:92
int state() const
Definition: EST_WFST.h:76
A specialised hash table for when the key is an EST_String.
Definition: EST_THash.h:304
#define streq(X, Y)
Definition: EST_cutils.h:57
float weight() const
Definition: EST_WFST.h:75
int index(EST_TList< T > &l, T &val, bool(*eq)(const EST_UItem *, const EST_UItem *)=NULL)
Definition: EST_TList.h:286
LISP siod_nth(int nth, LISP list)
Definition: siod.cc:214
void tlcompile(LISP tl, EST_WFST &all_wfst)
Definition: tlcompile.cc:57
void stop_cumulate()
Stop cumulation and calculate probabilities on transitions.
Definition: EST_WFST.cc:702
const char * get_c_string(LISP x)
Definition: slib.cc:638
LISP cons(LISP x, LISP y)
Definition: slib_list.cc:97
V & val(const EST_String &key, int &found) const
void build_tree_lex(LISP inalpha, LISP outalpha, LISP wlist)
Definition: tlcompile.cc:98
const EST_WFST_State * state(int i) const
Return internal state information.
Definition: EST_WFST.h:226
section options Options< strong > or ngram_per_line Pseudo words
int add_item(const EST_String &key, const V &value, int no_search=0)
Add an entry to the table.
void set_weight(float f)
Definition: EST_WFST.h:79
int in_epsilon() const
Internal index for input epsilon.
Definition: EST_WFST.h:222
EST_WFST_Transition * find_transition(int state, int in, int out) const
Find (first) transition given in and out symbols.
Definition: EST_WFST.cc:271
LISP car(LISP x)
Definition: slib_list.cc:115
void init(int init_num_states=10)
Clear with (estimation of number of states required)
Definition: EST_WFST.cc:149
EST_String
LISP siod_member_str(const char *key, LISP list)
Definition: siod.cc:167
LISP cdr(LISP x)
Definition: slib_list.cc:124